library(knitr)
knitr::opts_chunk$set(cache = TRUE, warning = FALSE,
message = FALSE, cache.lazy = FALSE, cache.path = "../../gitignore/RmdCache/", # keep heavy data in gitignore cache
fig.path = "Rmdfig/")
machine="mythinkpad" # define the machine we work on
loadALL = TRUE # load all uniteCov objects
loadannot = TRUE # load genome annotations
sourceDMS = TRUE # Load the calculated DMS
sourceSubUnite = FALSE
source("R02.3_DATALOAD.R")
source("homebrewDMSannotation.R") # needed for annotation, slight modification of genomation
## So far, we don't use DMR (because RRBS single end, this is not super meaningful)
rm(DMRBPlist, DMRlist)
Data getting loaded:
Compare fitness traits between the different offsprings groups. Follow up of Sagonas 2020 & Ferre Ortega’s master’s dissertation
Kaufmann et al. 2014: Body condition of the G2 fish, an estimate of fish health and a predictor of energy reserves and reproductive success, was calculated using there residuals from the regression of body mass on body length (Chellappaet al.1995).
fullMetadata_OFFS$BCI <- residuals(lmer(Wnettofin ~ Slfin * Sex + (1|brotherPairID), data=fullMetadata_OFFS))
## and for parents (no sex difference, only males):
fullMetadata_PAR$BCI <- residuals(lmer(Wnettofin ~ Slfin + (1|brotherPairID), data=fullMetadata_PAR))
Effect of paternal treatment on body condition of offspring: Kaufmann et al. 2014: “To investigate in which way paternal G1 exposure affected offspring tolerance, we tested how the relationship between G2 body condition and infection intensity was affected by paternal G1 exposure. This was tested in a linear mixed model on G2 body condition with paternal G1 treatment and the interaction between paternal G1 treatment and G2 infection intensity as fixed effects. Maternal half-sibship identity was set as a random effect”
Effect of treatment groups of offspring on body condition(Kaufmann et al. 2014): “The linear mixed effect model (nlme function in R) included G2 body condition as dependent variable, sex, G2 treatment (exposed vs. control), paternal G1 treatment (exposed vs. control) and their interactions as fixed effects as well as maternal G2 half-sibship identity as a random effect”
mod1 <- lme(BCI ~ offsTrt * patTrt, random=~1|brotherPairID,data=fullMetadata_OFFS)
anova(mod1) # strong significant effect of both offspring trt & paternal + interactions
## numDF denDF F-value p-value
## (Intercept) 1 100 0.000000 1.0000
## offsTrt 1 100 14.057746 0.0003
## patTrt 1 100 13.600342 0.0004
## offsTrt:patTrt 1 100 9.396573 0.0028
mod1.2 <- lme(BCI ~ trtG1G2, random=~1|brotherPairID,data=fullMetadata_OFFS)
## pairwise posthoc test
emmeans(mod1.2, list(pairwise ~ trtG1G2), adjust = "tukey")
## $`emmeans of trtG1G2`
## trtG1G2 emmean SE df lower.CL upper.CL
## NE_control 16.6 10.8 7 -8.87 42.0
## NE_exposed -57.7 11.0 7 -83.63 -31.8
## E_control 23.6 10.8 7 -1.85 49.0
## E_exposed 15.5 10.8 7 -9.90 41.0
##
## Degrees-of-freedom method: containment
## Confidence level used: 0.95
##
## $`pairwise differences of trtG1G2`
## 1 estimate SE df t.ratio p.value
## NE_control - NE_exposed 74.29 15.3 100 4.840 <.0001
## NE_control - E_control -7.03 15.2 100 -0.462 0.9671
## NE_control - E_exposed 1.03 15.2 100 0.067 0.9999
## NE_exposed - E_control -81.32 15.3 100 -5.298 <.0001
## NE_exposed - E_exposed -73.27 15.3 100 -4.773 <.0001
## E_control - E_exposed 8.05 15.2 100 0.529 0.9517
##
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 4 estimates
## Control father - treatment offspring has a strongly significantly lower BC than
## every other group, same as Kaufmann et al. 2014
myplot1 <- ggplot(fullMetadata_OFFS, aes(x=trtG1G2, y = BCI, fill=trtG1G2))+
geom_boxplot()+
geom_signif(comparisons = list(c("NE_control", "NE_exposed")),
map_signif_level=TRUE, annotations="***",
y_position = 150, tip_length = 0, vjust=0.4) +
geom_signif(comparisons = list(c("NE_exposed", "E_control")),
map_signif_level=TRUE, annotations="***",
y_position = 200, tip_length = 0, vjust=0.4) +
geom_signif(comparisons = list(c("NE_exposed", "E_exposed")),
map_signif_level=TRUE, annotations="***",
y_position = 250, tip_length = 0, vjust=0.4) +
scale_fill_manual(values = colOffs)+
theme_bw() + theme(legend.position = "none") +
ylab("Body Condition Index") +
scale_x_discrete(labels=c("NE_control" = "G1 control\nG2 control", "NE_exposed" = "G1 control\nG2 infected",
"E_control" = "G1 infected\nG2 control", "E_exposed" = "G1 infected\nG2 infected"),
name = NULL)
myplot1
mod_Tol <- lmer(BCI ~ No.Worms*PAT + (1|brotherPairID)+ (1|Sex), data=fullMetadata_OFFS, REML = F)
## Model selection:
step(mod_Tol, reduce.random = F) # Model found: full model
## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 7 -608.33 1230.7
## (1 | brotherPairID) 0 6 -608.33 1228.7 0 1 1
## (1 | Sex) 0 6 -608.33 1228.7 0 1 1
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## No.Worms:PAT 0 20660 20660 1 111 6.1283 0.01481 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Model found:
## BCI ~ No.Worms * PAT + (1 | brotherPairID) + (1 | Sex)
## The slope of BCI on nbrworms varies upon treatment
plot(ggpredict(mod_Tol, terms = c("No.Worms", "PAT")), add.data=T)+ theme_bw() +
ylab("Body Condition Index") + xlab("Number of worms") +
ggtitle("Predicted values of Body Condition Index in offspring")+
scale_color_manual(NULL, values = c("black", "red")) +
scale_fill_manual(NULL, values = c("black", "red")) +
scale_x_continuous(breaks = 0:10)+
geom_point(size=0)+ # to have color key in legend as point
guides(colour = guide_legend(override.aes = list(size=3,linetype=0, fill = NA)))
Calculate number of methylated sites, mean coverage, and residuals of methylated sites by covered sites (to account for coverage bias)
mycalcRMS <- function(myUniteCov, myMetaData){
percMethMat = methylKit::percMethylation(myUniteCov)
# create a dataframe with all info
percMethDF = data.frame(SampleID = colnames(percMethMat),
Nbr_methCpG = colSums(percMethMat>=70 & !is.na(percMethMat)), ## number of methylated sites
Nbr_coveredCpG = colSums(!is.na(percMethMat)), ## number of sites covered in this sample
Nbr_NOTcoveredCpG = colSums(is.na(percMethMat)),## number of sites NOT covered in this sample
MeanCoverage = colMeans(methylKit::getData(myUniteCov)[,myUniteCov@coverage.index], na.rm = T), ## coverage.index: vector denoting which columns in the data correspond to coverage values
OverallPercentageMethylation = colMeans(methylKit::percMethylation(myUniteCov), na.rm = T))
## RMS in this sample based on covered sites
percMethDF$RMS_coveredCpG = percMethDF$Nbr_methCpG / percMethDF$Nbr_coveredCpG
## merge with original metadata:
myMetaData = merge(myMetaData, percMethDF)
# calculate also RMS global, considering CpG covered or not (to compare)
myMetaData$RMS_allCpG_coveredOrNot = myMetaData$Nbr_methCpG / (myMetaData$M.Seqs_rawReads*10e6)
# calculate residuals of nbr of methCpG by nbr of covered CpG
myMetaData$res_Nbr_methCpG_Nbr_coveredCpG = residuals(
lm(myMetaData$Nbr_methCpG ~ myMetaData$Nbr_coveredCpG))
## REORDER myMetaData by sample ID
myMetaData = myMetaData[order(as.numeric(gsub("S", "", myMetaData$SampleID))),]
return(myMetaData)
}
fullMetadata <- mycalcRMS(uniteCovALL_woSexAndUnknowChr, fullMetadata)
fullMetadata_PAR <- mycalcRMS(uniteCov6_G1_woSexAndUnknowChrOVERLAP, fullMetadata_PAR)
fullMetadata_OFFS <- mycalcRMS(uniteCov14_G2_woSexAndUnknowChrOVERLAP, fullMetadata_OFFS)
We kept in total 135 samples. On average, 11.27 [+/- 0.33] million reads were sequenced. The average mapping efficiency was 85% [+/- 0.48%].
The mean coverage per CpG site in the full dataset, considering positions covered in all fish, is 83 [+/- 2.21].
For G1 only: We kept in total 24 samples. On average, 11.16 [+/- 0.67] million reads were sequenced. The average mapping efficiency was # 84% [+/- 1.39%].
The mean coverage per CpG site in G1, considering positions shared by at least 6 fish per treatment group (half individuals per group), and which overlap with positions retained in G2, is 46 [+/- 1.71].
For G2 only: We kept in total 111 samples. On average, 11.29 [+/- 0.38] million reads were sequenced. The average mapping efficiency was 86% [+/- 0.47%].
The mean coverage per CpG site in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, is 47 [+/- 0.76].
cor.test(fullMetadata_PAR$Nbr_coveredCpG,
fullMetadata_PAR$Nbr_methCpG, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: fullMetadata_PAR$Nbr_coveredCpG and fullMetadata_PAR$Nbr_methCpG
## S = 350, p-value = 2.153e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.8478261
## S = 350, p-value = 2.15e-06, rho = 0.85
ggplot(fullMetadata_PAR, aes(x=Nbr_coveredCpG, y=Nbr_methCpG))+
geom_smooth(method = "lm", col="black")+
geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = c("grey", "red")) +
theme_bw() + ggtitle(label = "Parents, CpG shared by half fish/trt")
## Check after RMS correction for coverage bias: CORRECTED (p-value = 0.4485)
cor.test(fullMetadata_PAR$Nbr_coveredCpG,
fullMetadata_PAR$RMS_coveredCpG, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: fullMetadata_PAR$Nbr_coveredCpG and fullMetadata_PAR$RMS_coveredCpG
## S = 2712, p-value = 0.4006
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1791304
ggplot(fullMetadata_PAR, aes(x=Nbr_coveredCpG, y=RMS_coveredCpG))+
geom_smooth(method = "lm", col="black")+
geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = c("grey", "red")) +
theme_bw() + ggtitle(label = "Parents, CpG shared by half fish/trt")
## and with residuals: COMPLETELY CORRECTED p-value = 0.9562
cor.test(fullMetadata_PAR$Nbr_coveredCpG,
fullMetadata_PAR$res_Nbr_methCpG_Nbr_coveredCpG, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: fullMetadata_PAR$Nbr_coveredCpG and fullMetadata_PAR$res_Nbr_methCpG_Nbr_coveredCpG
## S = 2308, p-value = 0.9886
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.003478261
ggplot(fullMetadata_PAR, aes(x=Nbr_coveredCpG, y=res_Nbr_methCpG_Nbr_coveredCpG))+
geom_smooth(method = "lm", col="black")+
geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = c("grey", "red")) +
theme_bw() + ggtitle(label = "Parents, CpG shared by half fish/trt")
############
## Offspring:
cor.test(fullMetadata_OFFS$Nbr_coveredCpG,
fullMetadata_OFFS$Nbr_methCpG, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: fullMetadata_OFFS$Nbr_coveredCpG and fullMetadata_OFFS$Nbr_methCpG
## S = 20254, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.9111355
## S = 20254, p-value < 2.2e-16 rho = 0.91
ggplot(fullMetadata_OFFS, aes(x=Nbr_coveredCpG, y=Nbr_methCpG))+
geom_smooth(method = "lm", col="black")+
geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
scale_x_continuous("Number of cytosines covered") +
scale_y_continuous("Number of methylated cytosines") +
theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")
## Plot distance to residuals:
fit <- lm(Nbr_methCpG ~ Nbr_coveredCpG, data = fullMetadata_OFFS)
plotdf <- fullMetadata_OFFS
plotdf$predicted <- predict(fit) # Save the predicted values
plotdf$residuals <- residuals(fit)
ggplot(plotdf, aes(x=Nbr_coveredCpG, y=Nbr_methCpG))+
geom_smooth(method = "lm", col="black")+
geom_segment(aes(xend = Nbr_coveredCpG, yend = predicted), col = "grey") +
geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
scale_x_continuous("Number of cytosines covered") +
scale_y_continuous("Number of methylated cytosines") +
theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")
## Check after RMS correction for coverage bias: SEMI CORRECTED (p-value = 0.01, rho = -0.24)
cor.test(fullMetadata_OFFS$Nbr_coveredCpG,
fullMetadata_OFFS$RMS_coveredCpG, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: fullMetadata_OFFS$Nbr_coveredCpG and fullMetadata_OFFS$RMS_coveredCpG
## S = 282466, p-value = 0.01158
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.2393208
ggplot(fullMetadata_OFFS, aes(x=Nbr_coveredCpG, y=RMS_coveredCpG))+
geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
geom_smooth(method = "lm", col="black")+
theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")
## and with residuals: COMPLETELY CORRECTED p-value = 0.51
cor.test(fullMetadata_OFFS$Nbr_coveredCpG,
fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: fullMetadata_OFFS$Nbr_coveredCpG and fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG
## S = 213674, p-value = 0.514
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.06250439
ggplot(fullMetadata_OFFS, aes(x=Nbr_coveredCpG, y=res_Nbr_methCpG_Nbr_coveredCpG))+
geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
geom_smooth(method = "lm", col="black")+
scale_x_continuous("Number of cytosines covered") +
scale_y_continuous("Residuals of number of methylated cytosines\n on number of cytosines covered") +
theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")
mod = lm(MappingEfficiency.BSBoldvsGynogen ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start: AIC=203.17
## MappingEfficiency.BSBoldvsGynogen ~ Sex
##
## Df Sum of Sq RSS AIC
## <none> 667.74 203.18
## - Sex 1 22.563 690.30 204.86
##
## Call:
## lm(formula = MappingEfficiency.BSBoldvsGynogen ~ Sex, data = fullMetadata_OFFS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9847 -1.5030 0.3133 2.0418 4.0823
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 86.3134 0.3337 258.624 <2e-16 ***
## SexM -0.9017 0.4699 -1.919 0.0576 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.475 on 109 degrees of freedom
## Multiple R-squared: 0.03269, Adjusted R-squared: 0.02381
## F-statistic: 3.683 on 1 and 109 DF, p-value: 0.05758
plot(ggpredict(mod, terms = c("Sex")), add.data = T)
NB: this is WITH unknown and sex chromosomes, before filtering.
mod = lm(M.Seqs_rawReads ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start: AIC=158.08
## M.Seqs_rawReads ~ Sex
##
## Df Sum of Sq RSS AIC
## - Sex 1 3.8569 448.66 157.04
## <none> 444.80 158.08
##
## Step: AIC=157.04
## M.Seqs_rawReads ~ 1
##
## Call:
## lm(formula = M.Seqs_rawReads ~ 1, data = fullMetadata_OFFS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4901 -1.1901 -0.2901 0.8599 8.8099
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.2901 0.1917 58.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.02 on 110 degrees of freedom
plot(ggpredict(mod, terms = c("Sex")), add.data = T)
NB: this is WITH unknown and sex chromosomes, before filtering.
mod = lm(MeanCoverage ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start: AIC=314.87
## MeanCoverage ~ Sex
##
## Df Sum of Sq RSS AIC
## - Sex 1 4.4425 1831.0 313.14
## <none> 1826.5 314.87
##
## Step: AIC=313.14
## MeanCoverage ~ 1
##
## Call:
## lm(formula = MeanCoverage ~ 1, data = fullMetadata_OFFS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.8662 -2.4123 -0.6565 1.7138 15.0594
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46.8858 0.3872 121.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.08 on 110 degrees of freedom
plot(ggpredict(mod, terms = c("Sex")), add.data = T)
NB: this is in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, without sex and unknown chromosome (after filtering)
mod = lm(Nbr_coveredCpG ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start: AIC=2579.96
## Nbr_coveredCpG ~ Sex
##
## Df Sum of Sq RSS AIC
## - Sex 1 1.9317e+10 1.3495e+12 2579.6
## <none> 1.3302e+12 2580.0
##
## Step: AIC=2579.56
## Nbr_coveredCpG ~ 1
##
## Call:
## lm(formula = Nbr_coveredCpG ~ 1, data = fullMetadata_OFFS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -338266 -58536 26350 82661 139983
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 853897 10513 81.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 110800 on 110 degrees of freedom
plot(ggpredict(mod, terms = c("Sex")), add.data = T)
NB: this is in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, without sex and unknown chromosome (after filtering)
mod = lm(OverallPercentageMethylation ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start: AIC=154.63
## OverallPercentageMethylation ~ Sex
##
## Df Sum of Sq RSS AIC
## <none> 431.18 154.63
## - Sex 1 12.428 443.61 155.78
##
## Call:
## lm(formula = OverallPercentageMethylation ~ Sex, data = fullMetadata_OFFS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9467 -1.2674 -0.4101 0.6060 10.3697
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.1035 0.2682 209.196 <2e-16 ***
## SexM -0.6692 0.3776 -1.772 0.0791 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.989 on 109 degrees of freedom
## Multiple R-squared: 0.02801, Adjusted R-squared: 0.0191
## F-statistic: 3.142 on 1 and 109 DF, p-value: 0.07911
plot(ggpredict(mod, terms = c("Sex")), add.data = T)
NB: this is in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, without sex and unknown chromosome (after filtering)
mod = lm(res_Nbr_methCpG_Nbr_coveredCpG ~ Sex, data = fullMetadata_OFFS)
summary(step(mod)) # sex is significant p = 0.000157 ***
## Start: AIC=2165.93
## res_Nbr_methCpG_Nbr_coveredCpG ~ Sex
##
## Df Sum of Sq RSS AIC
## <none> 3.1917e+10 2165.9
## - Sex 1 4493411296 3.6411e+10 2178.6
##
## Call:
## lm(formula = res_Nbr_methCpG_Nbr_coveredCpG ~ Sex, data = fullMetadata_OFFS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38952 -10289 -519 10434 45882
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6420 2307 2.782 0.006361 **
## SexM -12726 3248 -3.917 0.000157 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17110 on 109 degrees of freedom
## Multiple R-squared: 0.1234, Adjusted R-squared: 0.1154
## F-statistic: 15.35 on 1 and 109 DF, p-value: 0.0001565
anova(mod)
## Analysis of Variance Table
##
## Response: res_Nbr_methCpG_Nbr_coveredCpG
## Df Sum Sq Mean Sq F value Pr(>F)
## Sex 1 4.4934e+09 4493411296 15.345 0.0001565 ***
## Residuals 109 3.1917e+10 292820190
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(ggpredict(mod, terms = c("Sex")), add.data = T) +
xlab(NULL)+
ylab("Residuals of N methylated sites on N covered sites") +
ggtitle("Predicted values of global methylation in offspring")
fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG_div1000 <- (fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG)/1000
mod_Tol.Meth <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*No.Worms*PAT + (1|brotherPairID)+ (1|Sex),
data=fullMetadata_OFFS, REML = F)
## Model selection:
step(mod_Tol.Meth, reduce.random = F) # Model found: BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 11 -606.89 1235.8
## (1 | brotherPairID) 0 10 -606.89 1233.8 0 1 1
## (1 | Sex) 0 10 -606.89 1233.8 0 1 1
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms:PAT 1 2780.8 2780.8
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT 2 2396.3 2396.3
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms 3 1829.8 1829.8
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 4 2571.6 2571.6
## No.Worms:PAT 0 20659.8 20659.8
## NumDF DenDF F value Pr(>F)
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms:PAT 1 111 0.8465 0.35953
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT 1 111 0.7240 0.39667
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms 1 111 0.5492 0.46020
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 1 111 0.7681 0.38270
## No.Worms:PAT 1 111 6.1283 0.01481
##
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms:PAT
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms
## res_Nbr_methCpG_Nbr_coveredCpG_div1000
## No.Worms:PAT *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Model found:
## BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## The slope of BCI on nbrworms varies upon treatment but methylation does NOT vary with tolerance
mod_Tol.Meth <- lmer(BCI ~ No.Worms*PAT + (1|brotherPairID)+ (1|Sex),
data=fullMetadata_OFFS)
## And by treatment instead of No.worms?
mod_Tol.Meth2 <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*PAT*outcome + (1|brotherPairID)+ (1|Sex),
data=fullMetadata_OFFS, REML = F)
## Model selection:
step(mod_Tol.Meth2, reduce.random = F) # Model found: BCI ~ PAT + outcome + (1 | brotherPairID) + (1 | Sex) + PAT:outcome
## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 11 -603.06 1228.1
## (1 | brotherPairID) 0 10 -603.06 1226.1 0 1 1
## (1 | Sex) 0 10 -603.06 1226.1 0 1 1
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT:outcome 1 2156.6 2156.6
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:outcome 2 494.6 494.6
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT 3 1034.5 1034.5
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 4 2542.3 2542.3
## PAT:outcome 0 30432.9 30432.9
## NumDF DenDF F value Pr(>F)
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT:outcome 1 111 0.7034 0.403442
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:outcome 1 111 0.1603 0.689644
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT 1 111 0.3348 0.564008
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 1 111 0.8203 0.367046
## PAT:outcome 1 111 9.7478 0.002289
##
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT:outcome
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:outcome
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT
## res_Nbr_methCpG_Nbr_coveredCpG_div1000
## PAT:outcome **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Model found:
## BCI ~ PAT + outcome + (1 | brotherPairID) + (1 | Sex) + PAT:outcome
The slope of BCI on nbr worms varies upon parental treatment, but methylation does NOT vary with tolerance
## By group, tolerance slope as a function of methylation residuals:
modFULL <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*No.Worms + (1|brotherPairID) + (1|Sex),
data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2 %in% c("NE_exposed", "E_exposed"),])
## Model selection:
step(modFULL, reduce.random = F) # Model found: BCI ~ (1 | brotherPairID) + (1 | Sex)
## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 7 -293.63 601.26
## (1 | brotherPairID) 0 6 -293.63 599.26 0.000000 1 1.0000
## (1 | Sex) 0 6 -293.66 599.32 0.067386 1 0.7952
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq NumDF
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms 1 362.1 362.1 1
## No.Worms 2 577.4 577.4 1
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 3 8726.5 8726.5 1
## DenDF F value Pr(>F)
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms 49.047 0.1098 0.7418
## No.Worms 51.973 0.1776 0.6752
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 37.954 2.7327 0.1066
##
## Model found:
## BCI ~ (1 | brotherPairID) + (1 | Sex)
modFULL <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*PAT + (1|brotherPairID) + (1|Sex),
data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2 %in% c("NE_exposed", "E_exposed"),])
## Model selection:
step(modFULL, reduce.random = F) # Model found: BCI ~ PAT + (1 | brotherPairID) + (1 | Sex)
## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 7 -278.14 570.27
## (1 | brotherPairID) 0 6 -278.14 568.27 0.0000 1 1.0000
## (1 | Sex) 0 6 -278.93 569.86 1.5904 1 0.2073
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq NumDF
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT 1 504 504 1
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 2 1476 1476 1
## PAT 0 76908 76908 1
## DenDF F value Pr(>F)
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT 50.451 0.2624 0.6107
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 51.749 0.7782 0.3818
## PAT 47.592 41.0412 6.195e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Model found:
## BCI ~ PAT + (1 | brotherPairID) + (1 | Sex)
# 1. get raw values
percmeth = percMethylation(uniteCov14_G2_woSexAndUnknowChrOVERLAP)
# Run PCA on complete data (CpG covered in all fish)
PCA_allpos <- myPCA(x = t(na.omit(percmeth)), incomplete = FALSE)
## [1] "The chosen model is:"
## BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## <environment: 0x55deb5366228>
We perform a PCA on 78384 CpG sites covered in all G2 individuals. We first perform a test on the complete dataset.
fviz_pca_ind(PCA_allpos$res.PCA, label="none", habillage=PCA_allpos$metadata$trtG1G2) +
scale_color_manual(values = colOffs)+
scale_shape_manual(values=c(19,19,19,19))
fviz_pca_ind(PCA_allpos$res.PCA, label="none", habillage=as.factor(PCA_allpos$metadata$brotherPairID))
# The function dimdesc() can be used to identify the most correlated variables with a given principal component.
mydimdesc <- dimdesc(PCA_allpos$res.PCA, axes = c(1,2), proba = 0.05)
There are 36212 CpG sites most correlated (p < 0.05) with the first principal component , and 15101 with the second principal component.
The 2 first PCA axes do not explain BCI (p<0.05)
# Percentage of variance explained by each factor:
formula(PCA_allpos$modSel) # BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## <environment: 0x55deb5366228>
mod_noworms = lmer(BCI ~ PAT + (1 | brotherPairID) + (1 | Sex), data = PCA_allpos$metadata)
mod_noPAT = lmer(BCI ~ No.Worms + (1 | brotherPairID) + (1 | Sex), data = PCA_allpos$metadata)
# R2c conditional R2 value associated with fixed effects plus the random effects.
A = (MuMIn::r.squaredGLMM(PCA_allpos$modSel)[2] -
MuMIn::r.squaredGLMM(mod_noworms)[2])*100
B = (MuMIn::r.squaredGLMM(PCA_allpos$modSel)[2] -
MuMIn::r.squaredGLMM(mod_noPAT)[2])*100
# Set up scatterplot
scatterplot <- ggplot(fullMetadata_OFFS,
aes(x = res_Nbr_methCpG_Nbr_coveredCpG,
y = BCI, fill=trtG1G2)) +
geom_point(pch=21, size =3, alpha = .8) +
guides(color = "none") +
scale_fill_manual(values = colOffs, name = "Treatment",
labels = c("G1 control - G2 control", "G1 control - G2 exposed", "G1 exposed - G2 control", "G1 exposed - G2 exposed")) +
theme(plot.margin = margin()) + theme_bw() +
theme(legend.position = "none") +
xlab("Methylation residuals (methylated sites/coverage")+
ylab("Body Condition Index")
# Define marginal histogram
marginal_distribution <- function(x, var, group) {
ggplot(x, aes_string(x = var, fill = group)) +
# geom_histogram(bins = 30, alpha = 0.4, position = "identity") +
geom_density(alpha = 0.6, size = 0.2) +
guides(fill = "none") +
scale_fill_manual(values = colOffs) +
theme_void() +
theme(plot.margin = margin())
}
# Set up marginal histograms
x_hist <- marginal_distribution(fullMetadata_OFFS, "res_Nbr_methCpG_Nbr_coveredCpG", "trtG1G2")
y_hist <- marginal_distribution(fullMetadata_OFFS, "BCI", "trtG1G2") +
coord_flip()
# Align histograms with scatterplot
aligned_x_hist <- align_plots(x_hist, scatterplot, align = "v")[[1]]
aligned_y_hist <- align_plots(y_hist, scatterplot, align = "h")[[1]]
# Arrange plots
cowplot::plot_grid(
aligned_x_hist, NULL, scatterplot, aligned_y_hist, ncol = 2, nrow = 2, rel_heights = c(0.2, 1), rel_widths = c(1, 0.2)
)
Methylation profiles, CpG present in all fish
Methylation profile for the 55530 CpG sites covered in all G1 & G2 fish (N = 135:
makePrettyMethCluster(uniteCovALL_woSexAndUnknowChr, fullMetadata,
my.cols.trt=c("#333333ff","#ff0000ff","#ffe680ff","#ff6600ff","#aaccffff","#aa00d4ff"),
my.cols.fam = c(1:4), nbrk = 8)
Methylation profile for the 78384 CpG sites covered in all G2 fish (N = 111:
makePrettyMethCluster(uniteCovALL_G2_woSexAndUnknowChr, fullMetadata_OFFS,
my.cols.trt=c("#ffe680ff","#ff6600ff", "#aaccffff", "#aa00d4ff"),
my.cols.fam = c(1:4), nbrk = 8)
# Save
pdf(file = "../../dataOut/clusterALLCpG_offspings.pdf", width = 10, height = 4)
makePrettyMethCluster(uniteCovALL_G2_woSexAndUnknowChr, fullMetadata_OFFS,
my.cols.trt=c("#ffe680ff","#ff6600ff", "#aaccffff", "#aa00d4ff"),
my.cols.fam = c(1:4), nbrk = 8)
dev.off()
## png
## 2
Permutational multivariate analysis of variance (PERMANOVA) is a non-parametric multivariate statistical test. It is used to compare groups of objects and test the null hypothesis that the centroids and dispersion of the groups as defined by measure space are equivalent for all groups.
# make distance matrix with B-C distances
data.dist = makeDatadistFUN(uniteCovALL_G2_woSexAndUnknowChr)
## Adonis test: importance of each predictor
adonis2(data.dist ~ PAT * outcome * Sex * brotherPairID, data = fullMetadata_OFFS)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = data.dist ~ PAT * outcome * Sex * brotherPairID, data = fullMetadata_OFFS)
## Df SumOfSqs R2 F Pr(>F)
## PAT 1 0.002782 0.01470 1.8292 0.002 **
## outcome 1 0.001909 0.01009 1.2552 0.022 *
## Sex 1 0.002418 0.01277 1.5895 0.001 ***
## brotherPairID 7 0.028018 0.14805 2.6316 0.001 ***
## PAT:outcome 1 0.001680 0.00888 1.1045 0.137
## PAT:Sex 1 0.001492 0.00789 0.9813 0.503
## outcome:Sex 1 0.001557 0.00823 1.0240 0.325
## PAT:brotherPairID 7 0.014344 0.07580 1.3473 0.001 ***
## outcome:brotherPairID 7 0.010591 0.05596 0.9947 0.518
## Sex:brotherPairID 7 0.010394 0.05492 0.9763 0.649
## PAT:outcome:Sex 1 0.001453 0.00768 0.9555 0.610
## PAT:outcome:brotherPairID 7 0.010351 0.05470 0.9722 0.728
## PAT:Sex:brotherPairID 7 0.010249 0.05415 0.9626 0.739
## outcome:Sex:brotherPairID 6 0.008749 0.04623 0.9587 0.743
## PAT:outcome:Sex:brotherPairID 1 0.001127 0.00596 0.7413 0.998
## Residual 54 0.082132 0.43399
## Total 110 0.189247 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Results: family of the father (brotherPairID) explains more than 14% of the variance in methylation.
To focus on G1 and G2 treatments, we define the permutation structure considering brother pairs (N = 8), and use a PERMANOVA to test the hypothesis that paternal treatment, offspring treatment and their interactions significantly influencing global methylation.
perm <- how(nperm = 1000) # 1000 permutations
setBlocks(perm) <- with(fullMetadata_OFFS, brotherPairID) # define the permutation structure considering brotherPairID and sex
print(adonis2(data.dist ~ PAT * outcome * Sex, data = fullMetadata_OFFS, permutations = perm))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: with(fullMetadata_OFFS, brotherPairID)
## Permutation: free
## Number of permutations: 1000
##
## adonis2(formula = data.dist ~ PAT * outcome * Sex, data = fullMetadata_OFFS, permutations = perm)
## Df SumOfSqs R2 F Pr(>F)
## PAT 1 0.002782 0.01470 1.6344 0.000999 ***
## outcome 1 0.001909 0.01009 1.1216 0.034965 *
## Sex 1 0.002418 0.01277 1.4203 0.005994 **
## PAT:outcome 1 0.001716 0.00907 1.0080 0.089910 .
## PAT:Sex 1 0.001740 0.00920 1.0224 0.207792
## outcome:Sex 1 0.001703 0.00900 1.0004 0.316683
## PAT:outcome:Sex 1 0.001653 0.00874 0.9712 0.307692
## Residual 103 0.175326 0.92644
## Total 110 0.189247 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Results:
#### RUN Goodness of fit
myGOF.NMDS.FUN(dataset = uniteCovALL_G2_woSexAndUnknowChr)
Goodness of fit for NMDS suggested the presence of six dimensions with a stress value > 0.1 and 2 with > 0.2
## to find the seed that allows convergence:
# sapply(3:10, function(x) myNMDS(dataset = uniteCovALL_G2_woSexAndUnknowChr, metadata = fullMetadata_OFFS, myseed = x))
NMDSanalysis <- myNMDSFUN(dataset = uniteCovALL_G2_woSexAndUnknowChr, metadata = fullMetadata_OFFS, myseed = 4)
NMDSanalysis$NMDSplot
# Save
pdf(file = "../../dataOut/NMDSplot_allG2.pdf", width = 9, height = 11)
NMDSanalysis$NMDSplot
dev.off()
## png
## 2
AdonisWithinG1trtFUN(trtgp = c(2,3))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: with(fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp,
## Permutation: free
## Number of permutations: 1000
##
## adonis2(formula = data.dist ~ outcome + Sex + brotherPairID, data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp, ], permutations = perm)
## Model: adonis0(formula = ~outcome + Sex + brotherPairID, data = data, method = method)
## Df SumOfSqs R2 F Pr(>F)
## outcome 1 0.001475 0.01622 1.0073 0.486513
## Sex 1 0.002094 0.02302 1.4301 0.000999 ***
## brotherPairID 7 0.020026 0.22018 1.9537 0.001998 **
## Residual 46 0.067357 0.74058
## Total 55 0.090952 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Results:
NMDSanalysis_G1control <- myNMDSFUN(dataset = uniteCovALL_G2_woSexAndUnknowChr,
metadata = fullMetadata_OFFS, myseed = 25,
byParentTrt=TRUE,
trtgp = c(2,3))
#png(filename = "../../dataOut/NMDSplot_G1fromControlG2.png", width = 900, height = 900)
NMDSanalysis_G1control$NMDSplot
#dev.off()
AdonisWithinG1trtFUN(trtgp = c(5,6))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: with(fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp,
## Permutation: free
## Number of permutations: 1000
##
## adonis2(formula = data.dist ~ outcome + Sex + brotherPairID, data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp, ], permutations = perm)
## Model: adonis0(formula = ~outcome + Sex + brotherPairID, data = data, method = method)
## Df SumOfSqs R2 F Pr(>F)
## outcome 1 0.002160 0.02262 1.4047 0.006993 **
## Sex 1 0.002053 0.02150 1.3349 0.192807
## brotherPairID 7 0.022076 0.23117 2.0507 0.019980 *
## Residual 45 0.069205 0.72470
## Total 54 0.095494 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Results:
NMDSanalysis_G1infected <- myNMDSFUN(dataset = uniteCovALL_G2_woSexAndUnknowChr,
metadata = fullMetadata_OFFS, myseed = 25,
byParentTrt=TRUE,
trtgp = c(5,6))
#png(filename = "../../dataOut/NMDSplot_G1fromInfectedG2.png", width = 900, height = 900)
NMDSanalysis_G1infected$NMDSplot
#dev.off()
Differential methylation by brother pair (sex as covariate):
## All DMS are stored in DMSBPlist by brother pair
## Vector of all 4 comparisons
vecCompa <- c("CC_TC", "CT_TT", "CC_CT", "TC_TT")
vecCompaVerbose <- c("Control offspring in control vs infected parents", "Infected offspring in control vs infected parents", "Control vs infected offspring from control parent", "Control vs infected offspring from infected parent") # this is useful in plots
## Extract DMS for all 4 comparisons (by position)
myPosList = lapply(DMSBPlist, lapply, function(x){paste(x$chr, x$end)})
## Subselect those DMS present in at least 4 out of 8 BP
get2keep = function(Compa, NBP = 4){
x <- lapply(myPosList, function(x){unlist(x[[paste0("DMS_15pc_BP_", Compa)]])})
f <- table(unlist((x))) # each DMS present between 1 and 8 times
tokeep <- names(f)[f >= NBP]
# print(length(tokeep))
## Keep the DMS present in 4 families minimum
DMSBPlist_INTER4 <- lapply(x, function(x){x[x %in% tokeep]})
## Reorder by family:
DMSBPlist_INTER4 <- DMSBPlist_INTER4[names(DMSBPlist_INTER4)[order(names(DMSBPlist_INTER4))]]
return(DMSBPlist_INTER4)
}
## Prepare df for complexUpset
getUpsetDF = function(i, NBP){ # for a given comparison
A = get2keep(vecCompa[i], NBP)
A2 = lapply(A, function(x){
x = data.frame(x) # vector of DMS as df
names(x) = "DMS" # name each CpG
return(x)
})
## Add BP name
for (i in 1:length(names(A2))){
A2[[i]]["BP"] = names(A2)[i]
}
# make a dataframe
A2 = A2 %>% reduce(full_join, by = "DMS")
# names column with BP id
for (i in 2:ncol(A2)) {names(A2)[i] = unique(A2[!is.na(A2[i]), i])}
# replace by 0 or 1 the DMS absence/presence
A = data.frame(apply(A2[2:9], 2, function(x) ifelse(is.na(x), 0, 1)))
# add DMS
A$DMS = A2$DMS
return(A)
}
Make upset plots for DMS found in at least 6 BP:
for (i in 1:4){
df = getUpsetDF(i, NBP = 6)
print(ComplexUpset::upset(
df,
names(df)[1:8],
width_ratio=0.1,
themes=upset_default_themes(text=element_text(size=15)),
sort_intersections_by=c('degree', 'cardinality'),
queries=query_by_degree(
df, names(df)[1:8],
params_by_degree=list(
'1'=list(color='red', fill='red'),
'2'=list(color='purple', fill='purple'),
'3'=list(color='blue', fill='blue'),
'4'=list(color='grey', fill='grey'),
'5'=list(color='red', fill='red'),
'6'=list(color='purple', fill='purple'),
'7'=list(color='blue', fill='blue'),
'8'=list(color='green', fill='green')
),
only_components=c("intersections_matrix", "Intersection size")
)) + ggtitle(paste0("Differentially methylated sites found in more than six brother pairs in the comparison: \n", vecCompaVerbose[i]))) #+ theme(plot.title = element_text(size = 30)))
}
PARENTAL effect: DMS found in either CC-TC or CT-TT comparisons OFFSPRING effect: DMS found in either CC-CT or TC-TT comparisons INTERACTION effects: DMS found in CC-CT which show a differential methylation (not necessarily significant) in the opposite direction in TC-TT, or inversely
## Creates a data frame with BP, DMS, effect and comparison, when the DMS is found in 4 BP or more
get2keepFULLdfBP = function(Compa, NBP = 4, myeffect = "Paternal"){
x <- lapply(myPosList, function(x){unlist(x[[paste0("DMS_15pc_BP_", Compa)]])})
mylist = list() # empty list
for (i in 1:length(names(x))){
a=data.frame(BP=names(x)[i], DMS=x[[names(x)[i]]])
mylist[[i]] = a
names(mylist)[i] = names(x)[i]
} # fill full list with df containing BP and DMS
mydf = Reduce(function(...) merge(..., all=T), mylist) # concatenate in a df
# Add information
mydf$globalEffect = myeffect
mydf$comparison = Compa
# Keep the DMS present in 4 families minimum:
tokeep = names(table(mydf$DMS)[table(mydf$DMS)>=NBP])
mydf = mydf[mydf$DMS %in% tokeep,]
return(mydf)
}
# PARENTAL effect: DMS found in either CC-TC or CT-TT comparisons
df_PaternalEffect_4BPmin = unique(rbind(get2keepFULLdfBP(Compa = vecCompa[1], NBP = 4, myeffect = "G1all"),
get2keepFULLdfBP(Compa = vecCompa[2], NBP = 4, myeffect = "G1all")))
DMS_PaternalEffect_4BPmin = unique(df_PaternalEffect_4BPmin$DMS)
# OFFSPRING effect: DMS found in either CC-CT or TC-TT comparisons
df_OffspringEffect_4BPmin = unique(rbind(get2keepFULLdfBP(Compa = vecCompa[3], NBP = 4, myeffect = "G2all"),
get2keepFULLdfBP(Compa = vecCompa[4], NBP = 4, myeffect = "G2all")))
DMS_OffspringEffect_4BPmin = unique(df_OffspringEffect_4BPmin$DMS)
##########
## OVERLAP=DMS found in G1 & G2
# case 1 -> INTERACTION effects DMS found in CC-CT which show a differential methylation in the opposite direction in TC-TT, or inversely (reaction norm are inversed) in most of the brother pairs (5 or more)
# case 2 -> ADDITIVE effect: no slope inversion (4 or more)
# Get the raw methylation from the DMS which are in both Paternal and Offspring effects
subUnite = methylKit::select(uniteCov14_G2_woSexAndUnknowChrOVERLAP,
which(paste(uniteCov14_G2_woSexAndUnknowChrOVERLAP$chr, uniteCov14_G2_woSexAndUnknowChrOVERLAP$end) %in%
intersect(DMS_OffspringEffect_4BPmin, DMS_PaternalEffect_4BPmin)))
# Get mean methylation per brother pair, per treatment:
getMeanMeth <- function(subUnite, BP, mytrt){
metadata = fullMetadata_OFFS[fullMetadata_OFFS$brotherPairID %in% BP & fullMetadata_OFFS$trtG1G2 %in% mytrt, ]
myuniteCov = reorganize(methylObj = subUnite, treatment = metadata$trtG1G2_NUM, sample.ids = metadata$ID)
## remove bases where NO fish in this BP has a coverage
myuniteCov = methylKit::select(myuniteCov, which(!is.na(rowSums(percMethylation(myuniteCov)))))
# calculate mean methylation
df = data.frame(DMS = paste(myuniteCov$chr, myuniteCov$end), meanMeth = rowMeans(percMethylation(myuniteCov)), trt = mytrt, BP = BP)
return(df)
}
# We will apply the following function to all BP and all trt:
vecBP <- unique(fullMetadata_OFFS$brotherPairID)
vectrt <- unique(fullMetadata_OFFS$trtG1G2)
## Loop over all BP & trt
df = data.frame(DMS=NULL, meanMeth=NULL, trt=NULL, BP=NULL) # empty df
for (i in 1:length(vecBP)){
for (j in 1:length(vectrt)){
subdf = getMeanMeth(subUnite = subUnite, BP = vecBP[[i]], mytrt = vectrt[[j]])
df = rbind(df, subdf)
}
}
## Add G1 & G2 trt
df$G1trt = ifelse(df$trt %in% c("NE_control", "NE_exposed"), "control", "infected")
df$G2trt = ifelse(df$trt %in% c("NE_control", "E_control"), "control", "infected")
## cut by G1 trt & merge
dfcp = df[df$G1trt %in% "control" ,]
dfcpco = dfcp[dfcp$G2trt %in% "control",]; dfcpio = dfcp[dfcp$G2trt %in% "infected",]
dfcp = merge(dfcpco, dfcpio, by = c("DMS", "BP")) %>%
mutate(meanDiffMeth=meanMeth.y - meanMeth.x) %>% dplyr::select(c("DMS", "BP", "meanDiffMeth"))
dfip = df[df$G1trt %in% "infected",]
dfipco = dfip[dfip$G2trt %in% "control",]; dfipio = dfip[dfip$G2trt %in% "infected",]
dfip = merge(dfipco, dfipio, by = c("DMS", "BP")) %>%
mutate(meanDiffMeth=meanMeth.y - meanMeth.x) %>% dplyr::select(c("DMS", "BP", "meanDiffMeth"))
df2=merge(dfcp,dfip, by=c("DMS", "BP"))
## interaction if slope inversion/additive if not
df2$inversionSlopeReactionNorms = FALSE
df2[!sign(df2$meanDiffMeth.x) == sign(df2$meanDiffMeth.y),"inversionSlopeReactionNorms"] = TRUE
names(df2)[names(df2) %in% "meanDiffMeth.x"] = "meanDiffMeth.controlG1"
names(df2)[names(df2) %in% "meanDiffMeth.y"] = "meanDiffMeth.infectedG1"
####################
# Get a vector of DMS for each category:
## A DMS is "interaction" if there are more often slope inversion than not
DMS_G1G2interactionEffect_4BPmin = df2 %>% group_by(DMS) %>% dplyr::summarise(count=n(), inversionSlopeRate=sum(inversionSlopeReactionNorms)/count,
effect=ifelse(inversionSlopeRate>0.5, "interaction", "additive")) %>%
dplyr::filter(effect=="interaction") %>% dplyr::select(DMS) %>% unlist()
DMS_G1G2additiveEffect_4BPmin = df2 %>% group_by(DMS) %>% dplyr::summarise(count=n(), inversionSlopeRate=sum(inversionSlopeReactionNorms)/count,
effect=ifelse(inversionSlopeRate>0.5, "interaction", "additive")) %>%
dplyr::filter(effect=="additive") %>% dplyr::select(DMS) %>% unlist()
DMS_G1onlyEffect_4BPmin = DMS_PaternalEffect_4BPmin[!DMS_PaternalEffect_4BPmin %in% c(DMS_G1G2interactionEffect_4BPmin, DMS_G1G2additiveEffect_4BPmin)]
DMS_G2onlyEffect_4BPmin = DMS_OffspringEffect_4BPmin[!DMS_OffspringEffect_4BPmin %in% c(DMS_G1G2interactionEffect_4BPmin, DMS_G1G2additiveEffect_4BPmin)]
####################
# Make a BIG df with all DMS, effects and BP (this time, the effects are DMS-BP specific, not global)
df_effects_full = merge(unique(df_PaternalEffect_4BPmin[c("BP", "DMS","globalEffect")]),
unique(df_OffspringEffect_4BPmin[c("BP", "DMS","globalEffect")]), by=c("BP", "DMS"), all=T)
df_effects_full = merge(df_effects_full, df2, all=T)
df_effects_full$effectBPlevel[df_effects_full$globalEffect.x == "G1all"] = "G1"
df_effects_full$effectBPlevel[df_effects_full$globalEffect.y == "G2all"] = "G2"
df_effects_full$effectBPlevel[df_effects_full$inversionSlopeReactionNorms == TRUE &
df_effects_full$globalEffect.x == "G1all" & df_effects_full$globalEffect.y == "G2all"] = "inter"
df_effects_full$effectBPlevel[df_effects_full$inversionSlopeReactionNorms == FALSE &
df_effects_full$globalEffect.x == "G1all" & df_effects_full$globalEffect.y == "G2all"]= "addit"
#rm junk
rm(subUnite, df, dfcp, dfcpco, dfcpio, dfip, dfipco, dfipio, df2)
# Plot a Venn diagram
ggVennDiagram(list("Paternal effect" = DMS_PaternalEffect_4BPmin, "Offspring effect" = DMS_OffspringEffect_4BPmin, "InteractionEffects" = DMS_G1G2interactionEffect_4BPmin),
label_alpha = 0) + scale_color_manual(values = c(1,1,1))+
scale_fill_gradient(low="white",high = "yellow") + theme(legend.position = "none")
# Save:
pdf(file = "../../dataOut/DMS3groupsVenn.pdf", width = 7, height = 6)
ggVennDiagram(list("Paternal effect" = DMS_PaternalEffect_4BPmin, "Offspring effect" = DMS_OffspringEffect_4BPmin, "InteractionEffects" = DMS_G1G2interactionEffect_4BPmin),
label_alpha = 0) + scale_color_manual(values = c(1,1,1))+
scale_fill_gradient(low="white",high = "yellow") + theme(legend.position = "none")
dev.off()
## png
## 2
The size of the different sections of the Venn diagram are as follows:
DMS_G1onlyEffect_4BPmin_ANNOT = myHomebrewDMSannotation(DMSvec = DMS_G1onlyEffect_4BPmin,
myannotBed12 = annotBed12, myannotGff3 = annotGff3)
## [1] "check that these features are identical:"
## [1] "gasAcul16628-RA" "gasAcul16627-RA"
## [1] "check that these features are identical:"
## [1] "gasAcul15294-RA" "gasAcul15295-RA"
## [1] "check that these features are identical:"
## [1] "gasAcul19985-RA" "gasAcul19984-RA"
DMS_G1onlyEffect_4BPmin_ANNOT=DMS_G1onlyEffect_4BPmin_ANNOT %>% mutate(effect = "G1")
# "check that these features are identical:"
# "gasAcul16628-RA" "gasAcul16627-RA" -> overlapping: Protein of unknown function & Tp63
# "gasAcul15294-RA" "gasAcul15295-RA" -> overlapping: Cdh23 & Vsir (immune!)
# "gasAcul19985-RA" "gasAcul19984-RA" -> overlapping: ST3GAL1 & ST3GAL1, all good for this one
DMS_G2onlyEffect_4BPmin_ANNOT = myHomebrewDMSannotation(DMSvec = DMS_G2onlyEffect_4BPmin,
myannotBed12 = annotBed12, myannotGff3 = annotGff3)
DMS_G2onlyEffect_4BPmin_ANNOT=DMS_G2onlyEffect_4BPmin_ANNOT %>% mutate(effect = "G2")
DMS_G1G2additiveEffect_4BPmin_ANNOT = myHomebrewDMSannotation(DMSvec = DMS_G1G2additiveEffect_4BPmin,
myannotBed12 = annotBed12, myannotGff3 = annotGff3)
DMS_G1G2additiveEffect_4BPmin_ANNOT=DMS_G1G2additiveEffect_4BPmin_ANNOT %>% mutate(effect = "addit")
DMS_G1G2interactionEffect_4BPmin_ANNOT = myHomebrewDMSannotation(DMSvec = DMS_G1G2interactionEffect_4BPmin,
myannotBed12 = annotBed12, myannotGff3 = annotGff3)
## [1] "check that these features are identical:"
## [1] "gasAcul04256-RA" "gasAcul04255-RA"
## [1] "check that these features are identical:"
## [1] "gasAcul04256-RA" "gasAcul04255-RA"
DMS_G1G2interactionEffect_4BPmin_ANNOT=DMS_G1G2interactionEffect_4BPmin_ANNOT%>% mutate(effect = "inter")
# "check that these features are identical:"
# "gasAcul04256-RA" "gasAcul04255-RA" -> overlapping: Protein if unknown function & Proteolipid protein DM beta
# Plot a Venn diagram to see genes in common
pdf(file = "../../dataOut/DMSgroupsVenn_geneLevel.pdf", width = 7, height = 6)
ggVennDiagram(list("G1" = DMS_G1onlyEffect_4BPmin_ANNOT$feature.name, "G2" = DMS_G2onlyEffect_4BPmin_ANNOT$feature.name,
"addit" = DMS_G1G2additiveEffect_4BPmin_ANNOT$feature.name, "inter" = DMS_G1G2interactionEffect_4BPmin_ANNOT$feature.name),
label_alpha = 0) + scale_color_manual(values = c(1,1,1,1))+
scale_fill_gradient(low="white",high = "yellow") + theme(legend.position = "none") + ggtitle("Genes in each effect")
dev.off()
## png
## 2
## NB some genes have DMS in different effects!
# A gene has DMSs in the 4 effects!
geneAll4 = intersect(intersect(intersect(DMS_G1onlyEffect_4BPmin_ANNOT$feature.name, DMS_G2onlyEffect_4BPmin_ANNOT$feature.name),
DMS_G1G2additiveEffect_4BPmin_ANNOT$feature.name), DMS_G1G2interactionEffect_4BPmin_ANNOT$feature.name)
DMS_G1onlyEffect_4BPmin_ANNOT[DMS_G1onlyEffect_4BPmin_ANNOT$feature.name %in% geneAll4,]
## GeneSymbol feature.name chrom start end width strand.x
## 1 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054992 14054992 1 *
## 2 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054930 14054930 1 *
## 3 FKBP3 gasAcul22163-RA Gy_chrXVIII 14053519 14053519 1 *
## 4 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054980 14054980 1 *
## 5 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054965 14054965 1 *
## 6 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054983 14054983 1 *
## 7 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054932 14054932 1 *
## 8 FKBP3 gasAcul22163-RA Gy_chrXVIII 14053497 14053497 1 *
## 631 DPP6 gasAcul22719-RA Gy_chrXXI 13299709 13299709 1 *
## 632 DPP6 gasAcul22719-RA Gy_chrXXI 13299666 13299666 1 *
## 633 DPP6 gasAcul22719-RA Gy_chrXXI 13299696 13299696 1 *
## DMS featureType
## 1 Gy_chrXVIII 14054992 intergenic
## 2 Gy_chrXVIII 14054930 intergenic
## 3 Gy_chrXVIII 14053519 intergenic
## 4 Gy_chrXVIII 14054980 intergenic
## 5 Gy_chrXVIII 14054965 intergenic
## 6 Gy_chrXVIII 14054983 intergenic
## 7 Gy_chrXVIII 14054932 intergenic
## 8 Gy_chrXVIII 14053497 intergenic
## 631 Gy_chrXXI 13299709 intron
## 632 Gy_chrXXI 13299666 intron
## 633 Gy_chrXXI 13299696 intron
## Note
## 1 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 2 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 3 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 4 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 5 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 6 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 7 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 8 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 631 Similar to Dpp6: Dipeptidyl aminopeptidase-like protein 6 (Rattus norvegicus OX=10116)
## 632 Similar to Dpp6: Dipeptidyl aminopeptidase-like protein 6 (Rattus norvegicus OX=10116)
## 633 Similar to Dpp6: Dipeptidyl aminopeptidase-like protein 6 (Rattus norvegicus OX=10116)
## Ontology_term start.gene end.gene strand.y Parent
## 1 GO:0003755 14048181 14049908 - gasAcul22163
## 2 GO:0003755 14048181 14049908 - gasAcul22163
## 3 GO:0003755 14048181 14049908 - gasAcul22163
## 4 GO:0003755 14048181 14049908 - gasAcul22163
## 5 GO:0003755 14048181 14049908 - gasAcul22163
## 6 GO:0003755 14048181 14049908 - gasAcul22163
## 7 GO:0003755 14048181 14049908 - gasAcul22163
## 8 GO:0003755 14048181 14049908 - gasAcul22163
## 631 GO:0006508, GO:0008236 13289971 13323117 + gasAcul22719
## 632 GO:0006508, GO:0008236 13289971 13323117 + gasAcul22719
## 633 GO:0006508, GO:0008236 13289971 13323117 + gasAcul22719
## nDMSperGene geneLengthkb nDMSperGenekb ENTREZID description
## 1 8 1.727 4.63 2287 FKBP prolyl isomerase 3
## 2 8 1.727 4.63 2287 FKBP prolyl isomerase 3
## 3 8 1.727 4.63 2287 FKBP prolyl isomerase 3
## 4 8 1.727 4.63 2287 FKBP prolyl isomerase 3
## 5 8 1.727 4.63 2287 FKBP prolyl isomerase 3
## 6 8 1.727 4.63 2287 FKBP prolyl isomerase 3
## 7 8 1.727 4.63 2287 FKBP prolyl isomerase 3
## 8 8 1.727 4.63 2287 FKBP prolyl isomerase 3
## 631 3 33.146 0.09 1804 dipeptidyl peptidase like 6
## 632 3 33.146 0.09 1804 dipeptidyl peptidase like 6
## 633 3 33.146 0.09 1804 dipeptidyl peptidase like 6
## summary
## 1 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 2 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 3 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 4 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 5 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 6 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 7 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 8 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 631 This gene encodes a single-pass type II membrane protein that is a member of the peptidase S9B family of serine proteases. This protein has no detectable protease activity, most likely due to the absence of the conserved serine residue normally present in the catalytic domain of serine proteases. However, it does bind specific voltage-gated potassium channels and alters their expression and biophysical properties. Variations in this gene may be associated with susceptibility to amyotrophic lateral sclerosis and with idiopathic ventricular fibrillation. Alternative splicing results in multiple transcript variants. [provided by RefSeq, Mar 2014]
## 632 This gene encodes a single-pass type II membrane protein that is a member of the peptidase S9B family of serine proteases. This protein has no detectable protease activity, most likely due to the absence of the conserved serine residue normally present in the catalytic domain of serine proteases. However, it does bind specific voltage-gated potassium channels and alters their expression and biophysical properties. Variations in this gene may be associated with susceptibility to amyotrophic lateral sclerosis and with idiopathic ventricular fibrillation. Alternative splicing results in multiple transcript variants. [provided by RefSeq, Mar 2014]
## 633 This gene encodes a single-pass type II membrane protein that is a member of the peptidase S9B family of serine proteases. This protein has no detectable protease activity, most likely due to the absence of the conserved serine residue normally present in the catalytic domain of serine proteases. However, it does bind specific voltage-gated potassium channels and alters their expression and biophysical properties. Variations in this gene may be associated with susceptibility to amyotrophic lateral sclerosis and with idiopathic ventricular fibrillation. Alternative splicing results in multiple transcript variants. [provided by RefSeq, Mar 2014]
## effect
## 1 G1
## 2 G1
## 3 G1
## 4 G1
## 5 G1
## 6 G1
## 7 G1
## 8 G1
## 631 G1
## 632 G1
## 633 G1
## FKBP3 & DPP6
## Make a summary table with genes and to which effect they belong:
allDMSAnnot = rbind(DMS_G1onlyEffect_4BPmin_ANNOT,
DMS_G2onlyEffect_4BPmin_ANNOT,
DMS_G1G2additiveEffect_4BPmin_ANNOT,
DMS_G1G2interactionEffect_4BPmin_ANNOT)
# add summary in select
allDMSAnnot = allDMSAnnot %>%
dplyr::select(GeneSymbol, feature.name, chrom, start.gene, end.gene, geneLengthkb, nDMSperGene, effect, summary) %>%
unique %>% tidyr::spread(key = effect, value = nDMSperGene) %>%
dplyr::mutate(nDMS = rowSums(across(c(G1, G2, 'addit', 'inter')), na.rm = T),
nDMSperGeneKb=round(nDMS/geneLengthkb, 2)) %>%
rowwise() %>% dplyr::mutate(effect = paste0(c("G1", "G2", "addit", "inter")[
!is.na(c_across(all_of(c("G1", "G2", "addit", "inter"))))], collapse = ' ')) %>% # add effect
data.frame() %>%
arrange(desc(nDMSperGeneKb))#arrange in descending order
# reorder columns with summary at the end
allDMSAnnot = allDMSAnnot %>%
dplyr::select(c(names(allDMSAnnot)[!names(allDMSAnnot) %in% "summary"], "summary"))
# Get the first gene for each effect, i.e. the one with the most DMS/kb
allDMSAnnot_top = allDMSAnnot %>% arrange(effect)
allDMSAnnot_top = allDMSAnnot_top[!duplicated(allDMSAnnot_top$effect),]%>% arrange(desc(nDMSperGeneKb))
# Write out
write.csv(allDMSAnnot, file = "../../dataOut/allDMSAnnot.csv", row.names = F)
write.csv(allDMSAnnot_top, file = "../../dataOut/allDMSAnnot_top.csv", row.names = F)
plotGeneTarget <- function(myTargetGene, myannotBed12=annotBed12){
# plotdf
dfplot = rbind(DMS_G1onlyEffect_4BPmin_ANNOT[DMS_G1onlyEffect_4BPmin_ANNOT$GeneSymbol %in% myTargetGene,],
DMS_G2onlyEffect_4BPmin_ANNOT[DMS_G2onlyEffect_4BPmin_ANNOT$GeneSymbol %in% myTargetGene,],
DMS_G1G2additiveEffect_4BPmin_ANNOT[DMS_G1G2additiveEffect_4BPmin_ANNOT$GeneSymbol %in% myTargetGene,],
DMS_G1G2interactionEffect_4BPmin_ANNOT[DMS_G1G2interactionEffect_4BPmin_ANNOT$GeneSymbol %in% myTargetGene,])
# Find TSS position of the gene
dfplot$TSSpos = myannotBed12$TSSes[myannotBed12$TSSes$name %in% dfplot$feature.name]@ranges@start
# Set TSS as origin
dfplot$start_distToTSS = dfplot$start - dfplot$TSSpos
dfplot$end_distToTSS = dfplot$end - dfplot$TSSpos
dfplot$start.gene_distToTSS = dfplot$start.gene - dfplot$TSSpos
dfplot$end.gene_distToTSS = dfplot$end.gene - dfplot$TSSpos
# Reorder effects factor for legend
dfplot$effect <- factor(dfplot$effect, levels = c("G1", "G2", "addit", "inter"))
# Prepare rectangles
mini=min(dfplot$start.gene_distToTSS, dfplot$start_distToTSS)
maxi=max(dfplot$end.gene_distToTSS, dfplot$end_distToTSS)+100
start.gene_distToTSS = unique(dfplot$start.gene_distToTSS)
end.gene_distToTSS = unique(dfplot$end.gene_distToTSS)
# Plot
plotGeneTarget= ggplot(dfplot) +
geom_rect(xmin=mini, xmax=maxi, ymin=0, ymax =.5, fill="#bfb6b6")+
geom_rect(aes(xmin=start.gene_distToTSS, xmax=end.gene_distToTSS, ymin=0, ymax =.5), fill = "black")+
geom_point(aes(x = start_distToTSS, y = .8, col = effect, pch=featureType), size = 5) +
geom_segment(aes(x = start_distToTSS, xend = start_distToTSS, y=0, yend=.8, col = effect)) +
geom_segment(aes(x = 0, xend = 0, y=0, yend=.5), col = "red", size = 3) + # TSS
theme_blank() +
theme(axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank())+
labs(title = paste(unique(dfplot$GeneSymbol), ":", dfplot$description),
subtitle = str_wrap(dfplot$summary, width = 150))+
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#CC79A7"))#cb friendly palette
####### And by brother pairs
dfplot_BP = merge(df_effects_full[df_effects_full$DMS %in% dfplot$DMS,c("BP", "DMS", "effectBPlevel")], dfplot)
# Rm potision with no effect in this BP
dfplot_BP=dfplot_BP[!is.na(dfplot_BP$effectBPlevel),]
# Same order of Father's family as in figure 1 (clusters)
dfplot_BP$BP = factor(dfplot_BP$BP, levels = c("BP05", "BP31", "BP04", "BP30", "BP16", "BP34", "BP49","BP46"))
# Plot
plotGeneTargetBP = ggplot(dfplot_BP) +
geom_rect(xmin=mini, xmax=maxi, ymin=0, ymax =.5, fill="#bfb6b6")+
geom_rect(aes(xmin=start.gene_distToTSS, xmax=end.gene_distToTSS, ymin=0, ymax =.5), fill = "black")+
geom_point(aes(x = start_distToTSS, y = .8, col = effectBPlevel, pch=featureType), size = 5) +
geom_segment(aes(x = start_distToTSS, xend = start_distToTSS, y=0, yend=.8, col = effectBPlevel)) +
geom_segment(aes(x = 0, xend = 0, y=0, yend=.5), col = "red", size = 3) + # TSS
theme_blank() +
facet_grid(BP~.)+ theme(panel.spacing = unit(1.5, "lines"))+
theme(axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank())+
scale_y_continuous(expand=expansion(mult=c(0,0.15))) # increase space up
scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#CC79A7"))#cb friendly palette
return(list(myTargetGene_DMSdf=dfplot, plotGeneTarget=plotGeneTarget, plotGeneTargetBP=plotGeneTargetBP))
}
## All 4 effects:
P = plotGeneTarget(myTargetGene = "FKBP3")
P$myTargetGene_DMSdf
## GeneSymbol feature.name chrom start end width strand.x
## 1 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054992 14054992 1 *
## 2 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054930 14054930 1 *
## 3 FKBP3 gasAcul22163-RA Gy_chrXVIII 14053519 14053519 1 *
## 4 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054980 14054980 1 *
## 5 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054965 14054965 1 *
## 6 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054983 14054983 1 *
## 7 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054932 14054932 1 *
## 8 FKBP3 gasAcul22163-RA Gy_chrXVIII 14053497 14053497 1 *
## 9 FKBP3 gasAcul22163-RA Gy_chrXVIII 14053503 14053503 1 *
## 10 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054800 14054800 1 *
## 11 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054854 14054854 1 *
## 12 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054831 14054831 1 *
## 13 FKBP3 gasAcul22163-RA Gy_chrXVIII 14053506 14053506 1 *
## 21 FKBP3 gasAcul22163-RA Gy_chrXVIII 14053508 14053508 1 *
## 31 FKBP3 gasAcul22163-RA Gy_chrXVIII 14053521 14053521 1 *
## 41 FKBP3 gasAcul22163-RA Gy_chrXVIII 14053542 14053542 1 *
## 51 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054809 14054809 1 *
## 61 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054878 14054878 1 *
## 71 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054885 14054885 1 *
## 81 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054887 14054887 1 *
## 14 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054874 14054874 1 *
## 22 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054881 14054881 1 *
## 32 FKBP3 gasAcul22163-RA Gy_chrXVIII 14054876 14054876 1 *
## DMS featureType
## 1 Gy_chrXVIII 14054992 intergenic
## 2 Gy_chrXVIII 14054930 intergenic
## 3 Gy_chrXVIII 14053519 intergenic
## 4 Gy_chrXVIII 14054980 intergenic
## 5 Gy_chrXVIII 14054965 intergenic
## 6 Gy_chrXVIII 14054983 intergenic
## 7 Gy_chrXVIII 14054932 intergenic
## 8 Gy_chrXVIII 14053497 intergenic
## 9 Gy_chrXVIII 14053503 intergenic
## 10 Gy_chrXVIII 14054800 intergenic
## 11 Gy_chrXVIII 14054854 intergenic
## 12 Gy_chrXVIII 14054831 intergenic
## 13 Gy_chrXVIII 14053506 intergenic
## 21 Gy_chrXVIII 14053508 intergenic
## 31 Gy_chrXVIII 14053521 intergenic
## 41 Gy_chrXVIII 14053542 intergenic
## 51 Gy_chrXVIII 14054809 intergenic
## 61 Gy_chrXVIII 14054878 intergenic
## 71 Gy_chrXVIII 14054885 intergenic
## 81 Gy_chrXVIII 14054887 intergenic
## 14 Gy_chrXVIII 14054874 intergenic
## 22 Gy_chrXVIII 14054881 intergenic
## 32 Gy_chrXVIII 14054876 intergenic
## Note
## 1 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 2 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 3 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 4 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 5 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 6 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 7 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 8 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 9 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 10 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 11 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 12 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 13 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 21 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 31 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 41 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 51 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 61 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 71 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 81 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 14 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 22 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## 32 Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913)
## Ontology_term start.gene end.gene strand.y Parent nDMSperGene
## 1 GO:0003755 14048181 14049908 - gasAcul22163 8
## 2 GO:0003755 14048181 14049908 - gasAcul22163 8
## 3 GO:0003755 14048181 14049908 - gasAcul22163 8
## 4 GO:0003755 14048181 14049908 - gasAcul22163 8
## 5 GO:0003755 14048181 14049908 - gasAcul22163 8
## 6 GO:0003755 14048181 14049908 - gasAcul22163 8
## 7 GO:0003755 14048181 14049908 - gasAcul22163 8
## 8 GO:0003755 14048181 14049908 - gasAcul22163 8
## 9 GO:0003755 14048181 14049908 - gasAcul22163 4
## 10 GO:0003755 14048181 14049908 - gasAcul22163 4
## 11 GO:0003755 14048181 14049908 - gasAcul22163 4
## 12 GO:0003755 14048181 14049908 - gasAcul22163 4
## 13 GO:0003755 14048181 14049908 - gasAcul22163 8
## 21 GO:0003755 14048181 14049908 - gasAcul22163 8
## 31 GO:0003755 14048181 14049908 - gasAcul22163 8
## 41 GO:0003755 14048181 14049908 - gasAcul22163 8
## 51 GO:0003755 14048181 14049908 - gasAcul22163 8
## 61 GO:0003755 14048181 14049908 - gasAcul22163 8
## 71 GO:0003755 14048181 14049908 - gasAcul22163 8
## 81 GO:0003755 14048181 14049908 - gasAcul22163 8
## 14 GO:0003755 14048181 14049908 - gasAcul22163 3
## 22 GO:0003755 14048181 14049908 - gasAcul22163 3
## 32 GO:0003755 14048181 14049908 - gasAcul22163 3
## geneLengthkb nDMSperGenekb ENTREZID description
## 1 1.727 4.63 2287 FKBP prolyl isomerase 3
## 2 1.727 4.63 2287 FKBP prolyl isomerase 3
## 3 1.727 4.63 2287 FKBP prolyl isomerase 3
## 4 1.727 4.63 2287 FKBP prolyl isomerase 3
## 5 1.727 4.63 2287 FKBP prolyl isomerase 3
## 6 1.727 4.63 2287 FKBP prolyl isomerase 3
## 7 1.727 4.63 2287 FKBP prolyl isomerase 3
## 8 1.727 4.63 2287 FKBP prolyl isomerase 3
## 9 1.727 2.32 2287 FKBP prolyl isomerase 3
## 10 1.727 2.32 2287 FKBP prolyl isomerase 3
## 11 1.727 2.32 2287 FKBP prolyl isomerase 3
## 12 1.727 2.32 2287 FKBP prolyl isomerase 3
## 13 1.727 4.63 2287 FKBP prolyl isomerase 3
## 21 1.727 4.63 2287 FKBP prolyl isomerase 3
## 31 1.727 4.63 2287 FKBP prolyl isomerase 3
## 41 1.727 4.63 2287 FKBP prolyl isomerase 3
## 51 1.727 4.63 2287 FKBP prolyl isomerase 3
## 61 1.727 4.63 2287 FKBP prolyl isomerase 3
## 71 1.727 4.63 2287 FKBP prolyl isomerase 3
## 81 1.727 4.63 2287 FKBP prolyl isomerase 3
## 14 1.727 1.74 2287 FKBP prolyl isomerase 3
## 22 1.727 1.74 2287 FKBP prolyl isomerase 3
## 32 1.727 1.74 2287 FKBP prolyl isomerase 3
## summary
## 1 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 2 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 3 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 4 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 5 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 6 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 7 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 8 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 9 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 10 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 11 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 12 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 13 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 21 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 31 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 41 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 51 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 61 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 71 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 81 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 14 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 22 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## 32 The protein encoded by this gene is a member of the immunophilin protein family, which play a role in immunoregulation and basic cellular processes involving protein folding and trafficking. This encoded protein is a cis-trans prolyl isomerase that binds the immunosuppressants FK506 and rapamycin, as well as histone deacetylases, the transcription factor YY1, casein kinase II, and nucleolin. It has a higher affinity for rapamycin than for FK506 and thus may be an important target molecule for immunosuppression by rapamycin. [provided by RefSeq, Sep 2008]
## effect TSSpos start_distToTSS end_distToTSS start.gene_distToTSS
## 1 G1 14049908 5084 5084 -1727
## 2 G1 14049908 5022 5022 -1727
## 3 G1 14049908 3611 3611 -1727
## 4 G1 14049908 5072 5072 -1727
## 5 G1 14049908 5057 5057 -1727
## 6 G1 14049908 5075 5075 -1727
## 7 G1 14049908 5024 5024 -1727
## 8 G1 14049908 3589 3589 -1727
## 9 G2 14049908 3595 3595 -1727
## 10 G2 14049908 4892 4892 -1727
## 11 G2 14049908 4946 4946 -1727
## 12 G2 14049908 4923 4923 -1727
## 13 addit 14049908 3598 3598 -1727
## 21 addit 14049908 3600 3600 -1727
## 31 addit 14049908 3613 3613 -1727
## 41 addit 14049908 3634 3634 -1727
## 51 addit 14049908 4901 4901 -1727
## 61 addit 14049908 4970 4970 -1727
## 71 addit 14049908 4977 4977 -1727
## 81 addit 14049908 4979 4979 -1727
## 14 inter 14049908 4966 4966 -1727
## 22 inter 14049908 4973 4973 -1727
## 32 inter 14049908 4968 4968 -1727
## end.gene_distToTSS
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## 7 0
## 8 0
## 9 0
## 10 0
## 11 0
## 12 0
## 13 0
## 21 0
## 31 0
## 41 0
## 51 0
## 61 0
## 71 0
## 81 0
## 14 0
## 22 0
## 32 0
# Zoom in 1
P1 = P$plotGeneTarget + coord_cartesian(xlim=c(3588,3636)) + theme(legend.position = "none") +
ggtitle("") + labs(subtitle = "")+ theme(plot.background = element_rect(colour = "black", fill=NA, size=1))
# Zoom in 2
P2 = P$plotGeneTarget + coord_cartesian(xlim=c(4890,5060)) + theme(legend.position = "none") +
ggtitle("") + labs(subtitle = "")+ theme(plot.background = element_rect(colour = "black", fill=NA, size=1))
## For all BP, zoomed
Pbp1 = P$plotGeneTargetBP + theme(legend.position = "none") + coord_cartesian(xlim=c(3588,3636))+
theme(plot.background = element_rect(colour = "black", fill=NA, size=1))
Pbp2 = P$plotGeneTargetBP + theme(legend.position = "none") + coord_cartesian(xlim=c(4890,5060))+
theme(plot.background = element_rect(colour = "black", fill=NA, size=1))
fullplotFKBP3 = cowplot::plot_grid(P$plotGeneTarget,
cowplot::plot_grid(P1, P2, nrow = 1, ncol =2),
cowplot::plot_grid(Pbp1, Pbp2, nrow = 1, ncol =2),
nrow = 3, rel_heights = c(1,1,3), labels = c("A", "B", "C"))
fullplotFKBP3
# Plot gene and zooms
pdf("../../dataOut/FKBP3_DMS.pdf", width = 15, height = 15)
fullplotFKBP3
dev.off()
## png
## 2
# create gene universe
gene_universe <- data.frame(
subsetByOverlaps(GRanges(annotGff3), GRanges(uniteCov14_G2_woSexAndUnknowChrOVERLAP))) %>% # subselect covered CpGs
filter(lengths(Ontology_term)!=0) %>% # rm non existing GO terms
filter(type %in% "gene") %>% # keep all the 7416 genes with GO terms
dplyr::select(c("Name", "Ontology_term")) %>%
mutate(go_linkage_type = "IEA") %>% #NB: IEA but not necessarily true, it's from Interproscan after Maker. Sticklebacks (biomart) have 82701 IEA and 63 ISS.
relocate("Ontology_term","go_linkage_type","Name") %>%
unnest(Ontology_term) %>% # one GO per line (was a list before in this column)
data.frame()
# Create gene set collection
goFrame <- GOFrame(gene_universe, organism="Gasterosteus aculeatus")
goAllFrame <- GOAllFrame(goFrame)
gsc_universe <- GeneSetCollection(goAllFrame, setType = GOCollection())
IMPORTANT NOTE from Mel: why conditional hypergeometric test? The GO ontology is set up as a directed acyclic graph, where a parent term is comprised of all its child terms. If you do a standard hypergeometric, you might e.g., find ‘positive regulation of kinase activity’ to be significant. If you then test ‘positive regulation of catalytic activity’, which is a parent term, then it might be significant as well, but only because of the terms coming from positive regulation of kinase activity.
The conditional hypergeometric takes this into account, and only uses those terms that were not already significant when testing a higher order (parent) term.
GO_G1only = makedfGO(DMS_G1onlyEffect_4BPmin_ANNOT, gene_universe, effect = "G1only")
GO_G2only = makedfGO(DMS_G2onlyEffect_4BPmin_ANNOT, gene_universe, effect = "G2only")
GO_G1G2addit = makedfGO(DMS_G1G2additiveEffect_4BPmin_ANNOT, gene_universe, effect = "addit")
GO_G1G2inter = makedfGO(DMS_G1G2interactionEffect_4BPmin_ANNOT, gene_universe, effect = "inter")
dfGO = rbind(GO_G1only, GO_G2only, GO_G1G2addit, GO_G1G2inter)
GOplot <- dfGO %>% ggplot(aes(x=Effect, y = factor(GO.name))) +
geom_point(aes(color = p.value.adjusted, size = genePercent)) +
scale_color_gradient(name="adjusted\np-value", low = "red", high = "blue") +
scale_size_continuous(name = "% of genes")+
theme_bw() + ylab("") + xlab("Treatments comparison") +
theme(legend.box.background = element_rect(fill = "#ebebeb", color = "#ebebeb"),
legend.background = element_rect(fill = "#ebebeb", color = "#ebebeb"),
legend.key = element_rect(fill = "#ebebeb", color = "#ebebeb"), legend.position="left") + # grey box for legend
facet_grid(fct_inorder(GO.category)~., scales="free",space = "free")
GOplot
pdf(GOplot, file = "../../dataOut/GOplot4Venncat.pdf", width = 10, height = 25)
GOplot
dev.off()
## png
## 2
Approach:
For each DMS G1 and/or G2 effects:
Extract methylation values: raw beta values at DMS shared by >4 (or more) BP
PCA
Extract axes 1 & 2
Correlation with parasite load/BCI
If result positive, annotate the CpG associated with significant axis
# Make PCA and model lmer(BCI ~ PCA1*PCA2*No.Worms*PAT + (1|brotherPairID)+ (1|Sex), data=metadata)
RESPCA <- getPCACpG(DMSvec=unique(c(DMS_PaternalEffect_4BPmin, DMS_OffspringEffect_4BPmin)), effect="all effects")
## [1] "2272 DMS linked with all effects"
## [1] "The chosen model is:"
## BCI ~ PCA2 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) +
## PCA2:PAT + No.Worms:PAT
## <environment: 0x55df205f9a18>
## [1] "1168 CpG sites most correlated (p < 0.05) with the first principal component"
## [1] "1135 CpG sites most correlated (p < 0.05) with the second principal component"
# 2272 DMS linked with all effects
# [1] "The chosen model is:"
# BCI ~ PCA2 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) +
# PCA2:PAT + No.Worms:PAT
# <environment: 0x55fd9c8571e8>
# [1] "1168 CpG sites most correlated (p < 0.05) with the first principal component"
# [1] "1135 CpG sites most correlated (p < 0.05) with the second principal component"
formula(RESPCA$PCA_percAtDMS_imputed$modSel)
## BCI ~ PCA2 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) +
## PCA2:PAT + No.Worms:PAT
## <environment: 0x55df205f9a18>
# The SECOND PCA axis is significant in BCI
# [1] "The chosen model is:"
BCI ~ PCA2 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) +
PCA2:PAT + No.Worms:PAT
## BCI ~ PCA2 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) +
## PCA2:PAT + No.Worms:PAT
### How much of the BCI variance is explained by each variables?
mod_noworms = lmer(BCI ~ PCA2 + PAT + PCA2:PAT + (1 | brotherPairID) + (1 | Sex),
data = RESPCA$PCA_percAtDMS_imputed$metadata)
mod_noPAT = lmer(BCI ~ PCA2 + No.Worms + (1 | brotherPairID) + (1 | Sex),
data = RESPCA$PCA_percAtDMS_imputed$metadata)
mod_noPCA2 = lmer(BCI ~ + No.Worms + PAT + No.Worms:PAT +(1 | brotherPairID) + (1 | Sex),
data = RESPCA$PCA_percAtDMS_imputed$metadata)
# R2c conditional R2 value associated with fixed effects plus the random effects.
A = (MuMIn::r.squaredGLMM(RESPCA$PCA_percAtDMS_imputed$modSel)[2] -
MuMIn::r.squaredGLMM(mod_noworms)[2])*100
B = (MuMIn::r.squaredGLMM(RESPCA$PCA_percAtDMS_imputed$modSel)[2] -
MuMIn::r.squaredGLMM(mod_noPAT)[2])*100
C = (MuMIn::r.squaredGLMM(RESPCA$PCA_percAtDMS_imputed$modSel)[2] -
MuMIn::r.squaredGLMM(mod_noPCA2)[2])*100
round(A, 2) #10.71% of the variance in associated with the parasite load (number of worms)
## [1] 10.71
round(B, 2) #22% of the variance in associated with the paternal treatment
## [1] 22
round(C, 2) #9.47% of the variance in associated with the second PCA axis
## [1] 9.47
### Plot of the model
phenoMethPlot <- plot(ggpredict(RESPCA$PCA_percAtDMS_imputed$modSel, terms = c("No.Worms", "PCA2", "PAT")), add.data = TRUE, alpha = .08) +
theme_bw() +
scale_color_gradient(low = "white", high = "red")+
scale_fill_gradient(low = "white", high = "red") +
ylab("Body Condition Index") + xlab("Number of worms")+
ggtitle("Predicted values of Body Condition Index in offspring")
phenoMethPlot
# save
pdf(file = "../../dataOut/phenotypeMeth/phenoMethPlot_alleffects.pdf", width = 7, height = 5)
phenoMethPlot
dev.off()
## png
## 2
annotPCAaxisFull <- myHomebrewDMSannotation(DMSvec = paste(RESPCA$CpGPCA2$chr, RESPCA$CpGPCA2$end),
myannotBed12 = annotBed12, myannotGff3 = annotGff3)
## [1] "check that these features are identical:"
## [1] "gasAcul16628-RA" "gasAcul16627-RA"
## [1] "check that these features are identical:"
## [1] "gasAcul04256-RA" "gasAcul04255-RA"
## [1] "check that these features are identical:"
## [1] "gasAcul04256-RA" "gasAcul04255-RA"
## [1] "check that these features are identical:"
## [1] "gasAcul15294-RA" "gasAcul15295-RA"
# merge with full table to add effect
annotPCAaxisFull = merge(annotPCAaxisFull, allDMSAnnot)
annotPCAaxis = annotPCAaxisFull %>%
dplyr::select(c("GeneSymbol", "feature.name", "Note", "chrom", "nDMSperGenekb", "ENTREZID", "description", "summary", "effect"))%>%
unique
write.csv(annotPCAaxis, "../../dataOut/annotPCA2_1135DMS_435genes.csv", row.names = F)
P=plotManhattanGenesDMS(annotFile = annotPCAaxisFull, GYgynogff = GYgynogff)
P
pdf("../../dataOut/ManhattanPlots1135DMS435genes.pdf", width = 10, height = 3)
P
dev.off()
## png
## 2
GO_PCA2_1135DMS = makedfGO(annotPCAaxisFull, gene_universe, effect = "PCAaxis2")
GOplot <- GO_PCA2_1135DMS %>% ggplot(aes(x=Effect, y = factor(GO.name))) +
geom_point(aes(color = p.value.adjusted, size = genePercent)) +
scale_color_gradient(name="adjusted\np-value", low = "red", high = "blue") +
scale_size_continuous(name = "% of genes")+
theme_bw() + ylab("") + xlab("") +
theme(legend.box.background = element_rect(fill = "#ebebeb", color = "#ebebeb"),
legend.background = element_rect(fill = "#ebebeb", color = "#ebebeb"),
legend.key = element_rect(fill = "#ebebeb", color = "#ebebeb"), legend.position="left") + # grey box for legend
facet_grid(fct_inorder(GO.category)~., scales="free",space = "free")
GOplot
pdf(GOplot, file = "../../dataOut/GOplotPCA2.pdf", width = 6, height = 7)
GOplot
dev.off()
## png
## 2
# GO of 435 genes linked with the 1135 DMS PCA2, split by effect (NB some genes in different effects; genes with at least 1 CpG in one effect)
# ```{r geneOnto_runGO_1135DMS_axis2_SPLITBYEFFECT}
GO_G1_sub = makedfGO(annotPCAaxisFull[!is.na(annotPCAaxisFull$G1),], gene_universe, effect = "G1")
GO_G2_sub = makedfGO(annotPCAaxisFull[!is.na(annotPCAaxisFull$G2),], gene_universe, effect = "G2")
GO_addit_sub = makedfGO(annotPCAaxisFull[!is.na(annotPCAaxisFull$addit),], gene_universe, effect = "addit")
GO_inter_sub = makedfGO(annotPCAaxisFull[!is.na(annotPCAaxisFull$inter),], gene_universe, effect = "inter")
dfGO_sub = rbind(GO_G1_sub, GO_G2_sub, GO_addit_sub, GO_inter_sub)
### GO plot
GOplot_sub <- dfGO_sub %>% ggplot(aes(x=Effect, y = factor(GO.name))) +
geom_point(aes(color = p.value.adjusted, size = genePercent)) +
scale_color_gradient(name="adjusted\np-value", low = "red", high = "blue") +
scale_size_continuous(name = "% of genes")+
theme_bw() + ylab("") + xlab("Treatments comparison") +
theme(legend.box.background = element_rect(fill = "#ebebeb", color = "#ebebeb"),
legend.background = element_rect(fill = "#ebebeb", color = "#ebebeb"),
legend.key = element_rect(fill = "#ebebeb", color = "#ebebeb"), legend.position="left") + # grey box for legend
facet_grid(fct_inorder(GO.category)~., scales="free",space = "free")
GOplot_sub
pdf(GOplot_sub, file = "../../dataOut/GOplot4Venncat_split.pdf", width = 8, height = 12)
GOplot_sub
dev.off()
## png
## 2
## Features Annotation (use package genomation v1.24.0)
## NB Promoters are defined by options at genomation::readTranscriptFeatures function.
## The default option is to take -1000,+1000bp around the TSS and you can change that.
## -> following Heckwolf 2020 and Sagonas 2020, we consider 1500bp upstream and 500 bp downstream
## Parents comparison:
diffAnn_PAR = annotateWithGeneParts(as(DMSlist$DMS_15pc_G1_C_T,"GRanges"),annotBed12)
## Offspring from control parents comparison:
diffAnn_G2_controlG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_CC_CT,"GRanges"),annotBed12)
## Offspring from infected parents comparison:
diffAnn_G2_infectedG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_TC_TT,"GRanges"),annotBed12)
par(mfrow=c(1,3))
par(mar = c(.1,0.1,5,0.1)) # Set the margin on all sides to 2
## Parents comparison:
diffAnn_PAR = annotateWithGeneParts(as(DMSlist$DMS_15pc_G1_C_T,"GRanges"),annotBed12)
diffAnn_PAR
## promoter exon intron intergenic
## 8.58 15.84 34.13 45.72
## promoter exon intron intergenic
## 8.58 13.19 32.51 45.72
## promoter exon intron
## 1.07 0.19 0.44
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3 3020 8128 15141 19226 300247
genomation::plotTargetAnnotation(diffAnn_PAR,precedence=TRUE, main="DMS G1", cex.legend = 1, border="white")
## Offspring from control parents comparison:
diffAnn_G2_controlG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_CC_CT,"GRanges"),annotBed12)
diffAnn_G2_controlG1
## promoter exon intron intergenic
## 11.28 21.05 30.16 44.44
## promoter exon intron intergenic
## 11.28 16.21 28.07 44.44
## promoter exon intron
## 0.38 0.07 0.14
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 11 2602 6295 14212 15906 261017
genomation::plotTargetAnnotation(diffAnn_G2_controlG1,precedence=TRUE, main="DMS G2-G1c", cex.legend = 1, border="white")
## Offspring from infected parents comparison:
diffAnn_G2_infectedG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_TC_TT,"GRanges"),annotBed12)
diffAnn_G2_infectedG1
## promoter exon intron intergenic
## 12.90 13.62 34.64 46.81
## promoter exon intron intergenic
## 12.90 9.42 30.87 46.81
## promoter exon intron
## 0.28 0.03 0.08
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9 2355 7150 12285 14773 109527
genomation::plotTargetAnnotation(diffAnn_G2_infectedG1,precedence=TRUE, main="DMS G2-G1i", cex.legend = 1, border="white")
par(mfrow=c(1,1))
## Run the function to get DMS info
DMS_info_G1 <- myDMSinfo(DMSlist$DMS_15pc_G1_C_T, uniteCov6_G1_woSexAndUnknowChrOVERLAP)
DMS_info_G2_G1c_final <- myDMSinfo(DMSlist$DMS_15pc_CC_CT, uniteCov14_G2_woSexAndUnknowChrOVERLAP)
DMS_info_G2_G1i_final <- myDMSinfo(DMSlist$DMS_15pc_TC_TT,uniteCov14_G2_woSexAndUnknowChrOVERLAP)
NB Kostas’ results: “We found a total of 1,973 CpG sites out of 1,172,887 CpGs (0.17%) across the genome that showed at least 15% differential fractional methylation (differentially methylated site [DMS]; q < 0.01) between infected and uninfected fish”
Here: we obtain out a total of 1001880 CpG sites: 3648 (0.36%) showed at least 15% differential fractional methylation (differentially methylated site [DMS]; q < 0.01) between infected and uninfected fish in the parental group; 1197 (0.12%) in the offspring from control parents comparison; 690 (0.07%) in the offspring from infected parents comparison.
## Chi2 test: are the number of DMS from G2-G1C and G2-G1I overlapping with DMSpar statistically different?
A=length(intersect(DMS_info_G1$DMS,DMS_info_G2_G1c_final$DMS))
B=length(DMS_info_G2_G1c_final$DMS)
C=length(intersect(DMS_info_G1$DMS,DMS_info_G2_G1i_final$DMS))
D=length(DMS_info_G2_G1i_final$DMS)
Observed=matrix(c(A, B-A, C, D-C),nrow=2)
Observed
## [,1] [,2]
## [1,] 106 59
## [2,] 1091 631
chisq.test(Observed)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: Observed
## X-squared = 0.019909, df = 1, p-value = 0.8878
## not statistically different
## output Venn diagrams
allVenn <- ggVennDiagram(list("DMS G1" = DMS_info_G1$DMS, "DMS G2-c" = DMS_info_G2_G1c_final$DMS, "DMS G2-i" = DMS_info_G2_G1i_final$DMS), label_alpha = 0) +
scale_fill_gradient(low="white",high = "red")
hypoVenn <- ggVennDiagram(list("DMS G1\nhypo" = DMS_info_G1$DMS[DMS_info_G1$direction %in% "hypo"],
"DMS G2-c\nhypo" = DMS_info_G2_G1c_final$DMS[DMS_info_G2_G1c_final$direction %in% "hypo"],
"DMS G2-i\nhypo" = DMS_info_G2_G1i_final$DMS[DMS_info_G2_G1i_final$direction %in% "hypo"]), label_alpha = 0) +
scale_fill_gradient(low="white",high = "red")
hyperVenn <- ggVennDiagram(list("DMS G1\nhyper" = DMS_info_G1$DMS[DMS_info_G1$direction %in% "hyper"],
"DMS G2-c\nhyper" = DMS_info_G2_G1c_final$DMS[DMS_info_G2_G1c_final$direction %in% "hyper"],
"DMS G2-i\nhyper" = DMS_info_G2_G1i_final$DMS[DMS_info_G2_G1i_final$direction %in% "hyper"]), label_alpha = 0) +
scale_fill_gradient(low="white",high = "red")
ggarrange(allVenn,
ggarrange(hypoVenn, hyperVenn, ncol = 2, legend = "none"),
nrow = 2, widths = c(.5,1))
runHyperHypoAnnot <- function(){
par(mfrow=c(2,3))
par(mar = c(.1,0.1,5,0.1)) # Set the margin on all sides to 2
####### HYPO
## Parents comparison:
A = annotateWithGeneParts(
as(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hypo",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(A,precedence=TRUE, main="DMS G1\nhypo",
cex.legend = .4, border="white")
## Offspring from control parents comparison:
B = annotateWithGeneParts(
as(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hypo",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(B,precedence=TRUE, main="DMS G2-G1c\nhypo",
cex.legend = .4, border="white")
## Offspring from infected parents comparison:
C = annotateWithGeneParts(
as(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hypo",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(C,precedence=TRUE, main="DMS G2-G1i\nhypo",
cex.legend = .4, border="white")
####### HYPER
## Parents comparison:
D = annotateWithGeneParts(
as(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hyper",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(D,precedence=TRUE, main="DMS G1\nhyper",
cex.legend = .4, border="white")
## Offspring from control parents comparison:
E = annotateWithGeneParts(
as(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hyper",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(E,precedence=TRUE, main="DMS G2-G1c\nhyper",
cex.legend = .4, border="white")
## Offspring from infected parents comparison:
f = annotateWithGeneParts(
as(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hyper",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(f,precedence=TRUE, main="DMS G2-G1i\nhyper",
cex.legend = .4, border="white")
par(mfrow=c(1,1))
return(list(G1hypo=A, G2G1chypo=B, G2G1ihypo=C, G1hyper=D, G2G1chyper=E, G2G1ihyper=f))
}
myannot=runHyperHypoAnnot()
############################################################
## Venn diagram of overlapping features by their annotation:
table(rowSums(as.data.frame(myannot$G1hypo@members))) # NB: some positions are labelled with several features!
##
## 0 1 2 3
## 559 566 49 2
## as in MBE 2021: "giving precedence to the following order promoters, exons,
## introns, and intergenic regions when features overlapped"
myAnnotateDMS <- function(DMS, annot){
## sanity check
if (nrow(DMS) != nrow(annot)){"STOP error in arguments"}
DMS$pos <- paste(DMS$chr, DMS$start, DMS$end)
## NB as in MBE 2021: "giving precedence to the following order promoters, exons,
## introns, and intergenic regions when features overlapped"
DMS$feature <- NA
## 1. promoters
DMS$feature[which(annot$prom == 1)] = "promoter"
## 2. exons
DMS$feature[which(annot$exon == 1 & annot$prom ==0)] = "exon"
## 3. intron
DMS$feature[which(annot$intro == 1 & annot$exon == 0 & annot$prom ==0)] = "intron"
## 4. intergenic regions
DMS$feature[which(annot$intro == 0 & annot$exon == 0 & annot$prom ==0)] = "intergenic"
return(DMS)
}
DMSlist$DMS_15pc_G1_C_T = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T, as.data.frame(diffAnn_PAR@members))
DMSlist$DMS_15pc_G1_C_T_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hypo",],
as.data.frame(myannot$G1hypo@members))
DMSlist$DMS_15pc_G1_C_T_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hyper",],
as.data.frame(myannot$G1hyper@members))
DMSlist$DMS_15pc_CC_CT = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT, as.data.frame(diffAnn_G2_controlG1@members))
DMSlist$DMS_15pc_CC_CT_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hypo",],
as.data.frame(myannot$G2G1chypo@members))
DMSlist$DMS_15pc_CC_CT_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hyper",],
as.data.frame(myannot$G2G1chyper@members))
DMSlist$DMS_15pc_TC_TT = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT, as.data.frame(diffAnn_G2_infectedG1@members))
DMSlist$DMS_15pc_TC_TT_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hypo",],
as.data.frame(myannot$G2G1ihypo@members))
DMSlist$DMS_15pc_TC_TT_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hyper",],
as.data.frame(myannot$G2G1ihyper@members))
## Make Venn diagram for each feature
getFeatureDFHYPO <- function(myfeat){
a = DMSlist$DMS_15pc_G1_C_T_HYPO$pos[DMSlist$DMS_15pc_G1_C_T_HYPO$feature %in% myfeat]
b = DMSlist$DMS_15pc_CC_CT_HYPO$pos[DMSlist$DMS_15pc_CC_CT_HYPO$feature %in% myfeat]
c = DMSlist$DMS_15pc_TC_TT_HYPO$pos[DMSlist$DMS_15pc_TC_TT_HYPO$feature %in% myfeat]
return(list(a=a,b=b,c=c))
}
getFeatureDFHYPER <- function(myfeat){
a = DMSlist$DMS_15pc_G1_C_T_HYPER$pos[DMSlist$DMS_15pc_G1_C_T_HYPER$feature %in% myfeat]
b = DMSlist$DMS_15pc_CC_CT_HYPER$pos[DMSlist$DMS_15pc_CC_CT_HYPER$feature %in% myfeat]
c = DMSlist$DMS_15pc_TC_TT_HYPER$pos[DMSlist$DMS_15pc_TC_TT_HYPER$feature %in% myfeat]
return(list(a=a,b=b,c=c))
}
getVenn <- function(feat, direction){
if (direction == "hypo"){
ggVennDiagram(list(A = getFeatureDFHYPO(feat)[["a"]],
B = getFeatureDFHYPO(feat)[["b"]],
C = getFeatureDFHYPO(feat)[["c"]]), label_alpha = 0,
category.names = c(paste0("DMS G1\nhypo\n", feat), paste0("DMS G2-c\nhypo\n", feat), paste0("DMS G2-i\nhypo\n", feat))) +
scale_fill_gradient(low="white",high = "red")
} else if (direction == "hyper"){
ggVennDiagram(list(A = getFeatureDFHYPER(feat)[["a"]],
B = getFeatureDFHYPER(feat)[["b"]],
C = getFeatureDFHYPER(feat)[["c"]]), label_alpha = 0,
category.names = c(paste0("DMS G1\nhyper\n", feat), paste0("DMS G2-c\nhyper\n", feat), paste0("DMS G2-i\nhyper\n", feat))) +
scale_fill_gradient(low="white",high = "red")
}
}
ggarrange(getVenn("promoter", "hypo"), getVenn("exon", "hypo"),
getVenn("intron", "hypo"), getVenn("intergenic", "hypo"),
nrow = 2, ncol = 2)
ggarrange(getVenn("promoter", "hyper"), getVenn("exon", "hyper"),
getVenn("intron", "hyper"), getVenn("intergenic", "hyper"),
nrow = 2, ncol = 2)
## Parents trt-ctrl
# load annotation
annot_PAR <- as.data.frame(diffAnn_PAR@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_G1_C_T, annotFile = annot_PAR, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G1 DMS")
## G2-G1c trt-ctrl
# load annotation
annot_G2_G1c <- as.data.frame(diffAnn_G2_controlG1@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_CC_CT, annotFile = annot_G2_G1c, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1c DMS")
## G2-G1i trt-ctrl
# load annotation
annot_G2_G1i <- as.data.frame(diffAnn_G2_infectedG1@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_TC_TT, annotFile = annot_G2_G1i, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1i DMS")
## Outliers in Manhattan plot: 15% diff + 2SD
outliers_G1_final <- which(abs(DMSlist$DMS_15pc_G1_C_T$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_G1_C_T$meth.diff)))
outliers_annot_G1 <- as.data.frame(diffAnn_PAR@members)[outliers_G1_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_G1_C_T[outliers_G1_final, ],
annotFile = outliers_annot_G1, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G1 DMS")
outliers_G2_G1c_final <- which(abs(DMSlist$DMS_15pc_CC_CT$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_CC_CT$meth.diff)))
outliers_annot_G2_G1c <- as.data.frame(diffAnn_G2_controlG1@members)[outliers_G2_G1c_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_CC_CT[outliers_G2_G1c_final, ],
annotFile = outliers_annot_G2_G1c, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1c DMS")
outliers_G2_G1i_final <- which(abs(DMSlist$DMS_15pc_TC_TT$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_TC_TT$meth.diff)))
outliers_annot_G2_G1i <- as.data.frame(diffAnn_G2_infectedG1@members)[outliers_G2_G1i_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_TC_TT[outliers_G2_G1i_final, ],
annotFile = outliers_annot_G2_G1i, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1i DMS")
Question: how are the beta values in the different groups at the parental DMS?
##############
## Prepare dataset
PM_G1 <- getPMdataset(uniteCov = uniteCov6_G1_woSexAndUnknowChrOVERLAP, MD = fullMetadata_PAR, gener="parents")
PM_G2 <- getPMdataset(uniteCov = uniteCov14_G2_woSexAndUnknowChrOVERLAP, MD = fullMetadata_OFFS, gener="offspring")
Test of VCA: variance component analysis https://cran.r-project.org/web/packages/VCA/vignettes/VCA_package_vignette.html
PM_G2_mean_hypo <- PM_G2[PM_G2$hypohyper %in% "hypo", ] %>%
group_by(brotherPairID, G1_trt, G2_trt, ID) %>%
dplyr::summarize(MeanBetaValue = mean(BetaValue, na.rm=TRUE)) %>% data.frame()
varPlot(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hypo,
MeanLine=list(var=c("G1_trt", "G2_trt"),
col=c("white", "blue"), lwd=c(2,2)),
BG=list(var="G2_trt", col=paste0("gray", c(80, 90))),
YLabel=list(cex = .8, text="Mean beta value at parDMS \n hypomethylated upon infection"))
myfitVCA_hypo <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hypo)
### Real values
trtEffect <- sum(myfitVCA_hypo$aov.tab[2:4, 5])
genEffect <- sum(myfitVCA_hypo$aov.tab[5:8, 5])
error <- sum(myfitVCA_hypo$aov.tab[9, 5])
realValHypoVCA <- data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error)
### Randomisation
myrandomVCA <- function(df=PM_G2_mean_hypo){
randomDF = df
randomDF$G1_trt = sample(PM_G2_mean_hypo$G1_trt, replace = F)
randomDF$G2_trt = sample(PM_G2_mean_hypo$G2_trt, replace = F)
randomDF$brotherPairID = sample(PM_G2_mean_hypo$brotherPairID, replace = F)
myfitVCA <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = randomDF)
trtEffect <- sum(myfitVCA$aov.tab[2:4, 5])
genEffect <- sum(myfitVCA$aov.tab[5:8, 5])
error <- sum(myfitVCA$aov.tab[9, 5])
return(data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error))
}
# randomHypoVCA = do.call(rbind, lapply(1:1000, function(x) {
# df=myrandomVCA(PM_G2_mean_hypo)
# df$rep=x
# return(df)}))
# randomHypoVCA = melt(randomHypoVCA, id.vars = "rep")
# saveRDS(randomHypoVCA, file = "Rdata/randomHypoVCA.RDS")
randomHypoVCA <- readRDS(file = "Rdata/randomHypoVCA.RDS")
df2=reshape2::melt(realValHypoVCA)
sumDF <- randomHypoVCA %>%
group_by(variable) %>%
dplyr::summarize(value = mean(value)) %>% data.frame()
ggplot(randomHypoVCA, aes(x=variable, y=value))+
geom_boxplot()+
geom_jitter(width=.1, alpha=.2)+
geom_point(data = df2, col = "red", size = 6)+
geom_text(data=sumDF, aes(label=round(value)), col="white")+
geom_text(data = df2, aes(label=round(value)), col="white")+
theme_cleveland()+
ggtitle("VCA with bootstrap N=1000 at hypo-parDMS", subtitle = "red: observed values")
# estimate 95% confidence intervals, request CI for
# all variance components via 'VarVC=TRUE'
VCAinference(myfitVCA_hypo, VarVC=TRUE)
##
##
##
## Inference from (V)ariance (C)omponent (A)nalysis
## ------------------------------------------------
##
## > VCA Result:
## -------------
##
## Name DF SS MS VC %Total SD
## 1 total 6.6733 15.8572 100 3.9821
## 2 G1_trt 1 319.6035 319.6035 4.7515 29.9646 2.1798
## 3 G2_trt 1 3.8987 3.8987 0.0712 0.4491 0.2669
## 4 G1_trt:G2_trt 1 1.5539 1.5539 0* 0* 0*
## 5 brotherPairID 7 129.185 18.455 0* 0* 0*
## 6 G1_trt:brotherPairID 7 395.9735 56.5676 7.8576 49.5525 2.8031
## 7 G2_trt:brotherPairID 7 9.9214 1.4173 0* 0* 0*
## 8 G1_trt:G2_trt:brotherPairID 7 20.2165 2.8881 0* 0* 0*
## 9 error 79 250.9663 3.1768 3.1768 20.0337 1.7824
## CV[%] Var(VC)
## 1 6.6047
## 2 3.6154 66.6443
## 3 0.4426 0.0124
## 4 0* 0.0096
## 5 0* 5.4751
## 6 4.6493 19.7869
## 7 0* 0.068
## 8 0* 0.2383
## 9 2.9562 0.2555
##
## Mean: 60.2922 (N = 111)
##
## Experimental Design: unbalanced | Method: ANOVA | * VC set to 0 | adapted MS used for total DF
##
##
## > VC:
## -----
## Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total 15.8572 6.824 68.824 7.787 53.1883
## G1_trt 4.7515 0* 20.7519 0* 18.1795
## G2_trt 0.0712 0* 0.2897 0* 0.2545
## G1_trt:G2_trt 0
## brotherPairID 0
## G1_trt:brotherPairID 7.8576 0* 16.576 0.5409 15.1744
## G2_trt:brotherPairID 0
## G1_trt:G2_trt:brotherPairID 0
## error 3.1768 2.3794 4.457 2.491 4.2163
##
## > SD:
## -----
## Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total 3.9821 2.6123 8.296 2.7905 7.293
## G1_trt 2.1798 0* 4.5554 0* 4.2637
## G2_trt 0.2669 0* 0.5382 0* 0.5045
## G1_trt:G2_trt 0
## brotherPairID 0
## G1_trt:brotherPairID 2.8031 0* 4.0714 0.7355 3.8954
## G2_trt:brotherPairID 0
## G1_trt:G2_trt:brotherPairID 0
## error 1.7824 1.5425 2.1112 1.5783 2.0534
##
## > CV[%]:
## --------
## Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total 6.6047 4.3327 13.7597 4.6283 12.0961
## G1_trt 3.6154 0* 7.5556 0* 7.0718
## G2_trt 0.4426 0* 0.8926 0* 0.8368
## G1_trt:G2_trt 0
## brotherPairID 0
## G1_trt:brotherPairID 4.6493 0* 6.7527 1.2199 6.4609
## G2_trt:brotherPairID 0
## G1_trt:G2_trt:brotherPairID 0
## error 2.9562 2.5584 3.5015 2.6177 3.4057
##
##
## 95% Confidence Level | CIs for negative VCs excluded | * CI-limits constrained to be >= 0
## SAS PROC MIXED method used for computing CIs
PM_G2_mean_hyper <- PM_G2[PM_G2$hypohyper %in% "hyper", ] %>%
group_by(brotherPairID, G1_trt, G2_trt, ID) %>%
dplyr::summarize(MeanBetaValue = mean(BetaValue, na.rm=TRUE)) %>% data.frame()
varPlot(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hyper,
MeanLine=list(var=c("G1_trt", "G2_trt"),
col=c("white", "blue"), lwd=c(2,2)),
BG=list(var="G2_trt", col=paste0("gray", c(80, 90))),
YLabel=list(cex = .8, text="Mean beta value at parDMS \n hypermethylated upon infection"))
myfitVCA_hyper <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hyper)
### Real values
trtEffect <- sum(myfitVCA_hyper$aov.tab[2:4, 5])
genEffect <- sum(myfitVCA_hyper$aov.tab[5:8, 5])
error <- sum(myfitVCA_hyper$aov.tab[9, 5])
realValHyperVCA <- data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error)
### Randomisation
# randomHyperVCA = do.call(rbind, lapply(1:1000, function(x) {
# df=myrandomVCA(PM_G2_mean_hyper)
# df$rep=x
# return(df)}))
#
# randomHyperVCA = melt(randomHyperVCA, id.vars = "rep")
# saveRDS(randomHyperVCA, file = "Rdata/randomHyperVCA.RDS")
randomHyperVCA <- readRDS(file = "Rdata/randomHyperVCA.RDS")
df2=reshape2::melt(realValHyperVCA)
sumDF <- randomHyperVCA %>%
group_by(variable) %>%
dplyr::summarize(value = mean(value)) %>% data.frame()
ggplot(randomHyperVCA, aes(x=variable, y=value))+
geom_boxplot()+
geom_jitter(width=.1, alpha=.2)+
geom_point(data = df2, col = "red", size = 6)+
geom_text(data=sumDF, aes(label=round(value)), col="white")+
geom_text(data = df2, aes(label=round(value)), col="white")+
theme_cleveland()+
ggtitle("VCA with bootstrap N=1000 at hyper-parDMS", subtitle = "red: observed values")
# estimate 95% confidence intervals, request CI for
# all variance components via 'VarVC=TRUE'
VCAinference(myfitVCA_hypo, VarVC=TRUE)
##
##
##
## Inference from (V)ariance (C)omponent (A)nalysis
## ------------------------------------------------
##
## > VCA Result:
## -------------
##
## Name DF SS MS VC %Total SD
## 1 total 6.6733 15.8572 100 3.9821
## 2 G1_trt 1 319.6035 319.6035 4.7515 29.9646 2.1798
## 3 G2_trt 1 3.8987 3.8987 0.0712 0.4491 0.2669
## 4 G1_trt:G2_trt 1 1.5539 1.5539 0* 0* 0*
## 5 brotherPairID 7 129.185 18.455 0* 0* 0*
## 6 G1_trt:brotherPairID 7 395.9735 56.5676 7.8576 49.5525 2.8031
## 7 G2_trt:brotherPairID 7 9.9214 1.4173 0* 0* 0*
## 8 G1_trt:G2_trt:brotherPairID 7 20.2165 2.8881 0* 0* 0*
## 9 error 79 250.9663 3.1768 3.1768 20.0337 1.7824
## CV[%] Var(VC)
## 1 6.6047
## 2 3.6154 66.6443
## 3 0.4426 0.0124
## 4 0* 0.0096
## 5 0* 5.4751
## 6 4.6493 19.7869
## 7 0* 0.068
## 8 0* 0.2383
## 9 2.9562 0.2555
##
## Mean: 60.2922 (N = 111)
##
## Experimental Design: unbalanced | Method: ANOVA | * VC set to 0 | adapted MS used for total DF
##
##
## > VC:
## -----
## Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total 15.8572 6.824 68.824 7.787 53.1883
## G1_trt 4.7515 0* 20.7519 0* 18.1795
## G2_trt 0.0712 0* 0.2897 0* 0.2545
## G1_trt:G2_trt 0
## brotherPairID 0
## G1_trt:brotherPairID 7.8576 0* 16.576 0.5409 15.1744
## G2_trt:brotherPairID 0
## G1_trt:G2_trt:brotherPairID 0
## error 3.1768 2.3794 4.457 2.491 4.2163
##
## > SD:
## -----
## Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total 3.9821 2.6123 8.296 2.7905 7.293
## G1_trt 2.1798 0* 4.5554 0* 4.2637
## G2_trt 0.2669 0* 0.5382 0* 0.5045
## G1_trt:G2_trt 0
## brotherPairID 0
## G1_trt:brotherPairID 2.8031 0* 4.0714 0.7355 3.8954
## G2_trt:brotherPairID 0
## G1_trt:G2_trt:brotherPairID 0
## error 1.7824 1.5425 2.1112 1.5783 2.0534
##
## > CV[%]:
## --------
## Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total 6.6047 4.3327 13.7597 4.6283 12.0961
## G1_trt 3.6154 0* 7.5556 0* 7.0718
## G2_trt 0.4426 0* 0.8926 0* 0.8368
## G1_trt:G2_trt 0
## brotherPairID 0
## G1_trt:brotherPairID 4.6493 0* 6.7527 1.2199 6.4609
## G2_trt:brotherPairID 0
## G1_trt:G2_trt:brotherPairID 0
## error 2.9562 2.5584 3.5015 2.6177 3.4057
##
##
## 95% Confidence Level | CIs for negative VCs excluded | * CI-limits constrained to be >= 0
## SAS PROC MIXED method used for computing CIs
parmod <- lmer(data = PM_G1, BetaValue ~ meth.diff.parentals : Treatment + (1|CpGSite) + (1|brotherPairID))
## check normality of residuals assumption
qqnorm(resid(parmod))
qqline(resid(parmod))
pred <- ggpredict(parmod, terms = c("meth.diff.parentals", "Treatment"))
plot(pred, add.data = T)+
scale_color_manual(values = c("black", "red"))+
scale_y_continuous(name = "Beta values")+
scale_x_continuous(name = "Methylation difference between infected and control parents in percentage")+
ggtitle("Predicted methylation ratio (Beta) values in parents\n as a function of differential methylation between exposed and control groups")+
theme_bw()
modFull <- lmer(BetaValue ~ (G1_trt * G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID),data = PM_G2, REML = F) # REML =F for model comparison
mod_noG1trt <- lmer(BetaValue ~ G2_trt:hypohyper + (1|CpGSite)+ (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noG2trt <-lmer(BetaValue ~ G1_trt:hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noInteractions <- lmer(BetaValue ~ (G1_trt + G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noHypoHyper <- lmer(BetaValue ~ (G1_trt * G2_trt) + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
## check normality of residuals assumption
qqnorm(resid(modFull))
qqline(resid(modFull))
## Likelihood ratio tests for all variables:
lrtest(modFull, mod_noG1trt) # G1 trt is VERY VERY significant (LRT: χ² (4) = 1163.6, p < 0.001)
## Likelihood ratio test
##
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 |
## Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ G2_trt:hypohyper + (1 | CpGSite) + (1 | Sex) + (1 |
## brotherPairID)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 12 -1515719
## 2 8 -1516301 -4 1163.6 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lrtest(modFull, mod_noG2trt) # G2 trt is VERY VERY significant (LRT: χ² (4) = 30.02, p < 0.001) NB that changed when brotherpair is used instead of family!
## Likelihood ratio test
##
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 |
## Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ G1_trt:hypohyper + (1 | CpGSite) + (1 | Sex) + (1 |
## brotherPairID)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 12 -1515719
## 2 8 -1515734 -4 30.022 4.844e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lrtest(modFull, mod_noInteractions) # interactions are significant (LRT: χ² (2) = 9.21, p < 0.01)
## Likelihood ratio test
##
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 |
## Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ (G1_trt + G2_trt):hypohyper + (1 | CpGSite) + (1 |
## Sex) + (1 | brotherPairID)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 12 -1515719
## 2 10 -1515724 -2 9.211 0.009997 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lrtest(modFull, mod_noHypoHyper) # hypo/hyper VERY VERY significant (LRT: χ² (4) = 1140, p < 0.001)
## Likelihood ratio test
##
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 |
## Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ (G1_trt * G2_trt) + (1 | CpGSite) + (1 | Sex) + (1 |
## brotherPairID)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 12 -1515719
## 2 8 -1516289 -4 1140.4 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Post-hoc tests between treatments
modFull <- lmer(BetaValue ~ (G1_trt * G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID),data = PM_G2)
modFull_emmeans <- emmeans(modFull, list(pairwise ~ (G1_trt:G2_trt):hypohyper), adjust = "tukey")
modFull_emmeans
## $`emmeans of G1_trt, G2_trt, hypohyper`
## G1_trt G2_trt hypohyper emmean SE df asymp.LCL asymp.UCL
## Control Control hypo 62.1 0.960 Inf 60.2 64.0
## Exposed Control hypo 58.8 0.959 Inf 57.0 60.7
## Control Exposed hypo 62.3 0.960 Inf 60.4 64.2
## Exposed Exposed hypo 58.9 0.960 Inf 57.1 60.8
## Control Control hyper 56.6 0.872 Inf 54.9 58.3
## Exposed Control hyper 58.7 0.872 Inf 57.0 60.4
## Control Exposed hyper 55.8 0.872 Inf 54.1 57.6
## Exposed Exposed hyper 58.6 0.872 Inf 56.9 60.3
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $`pairwise differences of G1_trt, G2_trt, hypohyper`
## contrast estimate SE df z.ratio
## Control Control hypo - Exposed Control hypo 3.2670 0.199 Inf 16.439
## Control Control hypo - Control Exposed hypo -0.2024 0.205 Inf -0.989
## Control Control hypo - Exposed Exposed hypo 3.1756 0.202 Inf 15.735
## Control Control hypo - Control Control hyper 5.5292 0.672 Inf 8.226
## Control Control hypo - Exposed Control hyper 3.3925 0.672 Inf 5.049
## Control Control hypo - Control Exposed hyper 6.2691 0.673 Inf 9.317
## Control Control hypo - Exposed Exposed hyper 3.5420 0.672 Inf 5.268
## Exposed Control hypo - Control Exposed hypo -3.4694 0.202 Inf -17.160
## Exposed Control hypo - Exposed Exposed hypo -0.0913 0.199 Inf -0.460
## Exposed Control hypo - Control Control hyper 2.2623 0.671 Inf 3.369
## Exposed Control hypo - Exposed Control hyper 0.1256 0.671 Inf 0.187
## Exposed Control hypo - Control Exposed hyper 3.0021 0.672 Inf 4.467
## Exposed Control hypo - Exposed Exposed hyper 0.2751 0.671 Inf 0.410
## Control Exposed hypo - Exposed Exposed hypo 3.3780 0.205 Inf 16.479
## Control Exposed hypo - Control Control hyper 5.7317 0.673 Inf 8.514
## Control Exposed hypo - Exposed Control hyper 3.5950 0.673 Inf 5.342
## Control Exposed hypo - Control Exposed hyper 6.4715 0.674 Inf 9.608
## Control Exposed hypo - Exposed Exposed hyper 3.7444 0.673 Inf 5.561
## Exposed Exposed hypo - Control Control hyper 2.3536 0.672 Inf 3.501
## Exposed Exposed hypo - Exposed Control hyper 0.2169 0.672 Inf 0.323
## Exposed Exposed hypo - Control Exposed hyper 3.0935 0.673 Inf 4.597
## Exposed Exposed hypo - Exposed Exposed hyper 0.3664 0.672 Inf 0.545
## Control Control hyper - Exposed Control hyper -2.1367 0.137 Inf -15.618
## Control Control hyper - Control Exposed hyper 0.7398 0.141 Inf 5.251
## Control Control hyper - Exposed Exposed hyper -1.9872 0.139 Inf -14.332
## Exposed Control hyper - Control Exposed hyper 2.8765 0.140 Inf 20.557
## Exposed Control hyper - Exposed Exposed hyper 0.1495 0.137 Inf 1.091
## Control Exposed hyper - Exposed Exposed hyper -2.7271 0.141 Inf -19.290
## p.value
## <.0001
## 0.9761
## <.0001
## <.0001
## <.0001
## <.0001
## <.0001
## <.0001
## 0.9998
## 0.0172
## 1.0000
## 0.0002
## 0.9999
## <.0001
## <.0001
## <.0001
## <.0001
## <.0001
## 0.0110
## 1.0000
## 0.0001
## 0.9994
## <.0001
## <.0001
## <.0001
## <.0001
## 0.9589
## <.0001
##
## Degrees-of-freedom method: asymptotic
## P value adjustment: tukey method for comparing a family of 8 estimates
P1 <- plot(modFull_emmeans, by = "hypohyper", comparisons = TRUE) +
# coord_flip()+
theme_bw() +
ggtitle("Estimated marginal means of methylation ratio (beta)\n of offspring at parental DMS")+
theme(legend.position = "none", axis.title.x = element_blank()) +
scale_x_continuous("Beta value (methylation ratio)", limits = c(47,69.5))
## NB: Comparison arrows: https://cran.r-project.org/web/packages/emmeans/vignettes/xplanations.html
## two estimated marginal means (EMMs) differ significantly if, and only if, their respective comparison arrows do not overlap
## These comparison arrows are decidedly not the same as confidence intervals.
## Confidence intervals for EMMs are based on the statistical properties of the individual EMMs, whereas comparison arrows
## are based on the statistical properties of differences of EMMs.
## Add the PARENTAL DMS value
## Same test on ALL, G1 and G2 fish
modFullG1 <- lmer(BetaValue ~ G1_trt:hypohyper + (1|CpGSite) + (1|brotherPairID), data = PM_G1)
modFullG1_emmeans <- emmeans(modFullG1, list(pairwise ~ G1_trt:hypohyper), adjust = "tukey")
modFullG1_emmeans
## $`emmeans of G1_trt, hypohyper`
## G1_trt hypohyper emmean SE df asymp.LCL asymp.UCL
## Control hypo 66.9 0.657 Inf 65.7 68.2
## Exposed hypo 49.1 0.661 Inf 47.8 50.4
## Control hyper 48.6 0.534 Inf 47.6 49.6
## Exposed hyper 65.8 0.536 Inf 64.7 66.8
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $`pairwise differences of G1_trt, hypohyper`
## 1 estimate SE df z.ratio p.value
## Control hypo - Exposed hypo 17.889 0.303 Inf 59.074 <.0001
## Control hypo - Control hyper 18.343 0.643 Inf 28.539 <.0001
## Control hypo - Exposed hyper 1.179 0.645 Inf 1.829 0.2597
## Exposed hypo - Control hyper 0.454 0.647 Inf 0.701 0.8966
## Exposed hypo - Exposed hyper -16.711 0.649 Inf -25.761 <.0001
## Control hyper - Exposed hyper -17.164 0.209 Inf -82.107 <.0001
##
## Degrees-of-freedom method: asymptotic
## P value adjustment: tukey method for comparing a family of 4 estimates
P2 <- plot(modFullG1_emmeans, by = "hypohyper", comparisons = TRUE) +
theme_bw() +
ggtitle("Estimated marginal means of methylation ratio (beta)\n of parents at DMS")+
theme(legend.position = "none", axis.title.x = element_blank()) +
scale_x_continuous("Beta value (methylation ratio)", limits = c(47,69.5))
ggarrange(P2, P1, labels = c("A", "B"), ncol = 1, nrow = 2)
length(unique(PM_G1$CpGSite))# 3648 positions
## [1] 3648
PM_G1 %>% dplyr::count(ID)## NB: not all covered in all samples
## ID n
## 1 S16 3300
## 2 S17 2842
## 3 S35 2747
## 4 S36 2568
## 5 S52 3037
## 6 S53 3468
## 7 S68 3348
## 8 S69 3500
## 9 S70 2237
## 10 S71 3302
## 11 S72 3479
## 12 S73 2973
## 13 S76 3239
## 14 S77 3232
## 15 S78 3434
## 16 S79 2679
## 17 S94 2564
## 18 S95 1766
## 19 S107 3414
## 20 S108 3230
## 21 S124 3387
## 22 S125 2526
## 23 S143 3097
## 24 S144 2773
length(unique(PM_G2$CpGSite[PM_G2$hypohyper %in% "hypo"]))# 1176 positions hypomethylated upon parental inf
## [1] 1176
length(unique(PM_G2$CpGSite[PM_G2$hypohyper %in% "hyper"]))# 2472 positions hypermethylated upon parental inf
## [1] 2472
myfun <- function(X){
## Calculate nbr of CpG hypo/hypermethylated per individual, and nbr of covered CpG:
X <- X %>% group_by(ID, Treatment, brotherPairID, clutch.ID, Sex) %>%
dplyr::summarise(ncov = n(),
hypoMeth = sum(BetaValue < 0.3),
hyperMeth = sum(BetaValue > 0.7)) %>% data.frame()
# Calculate residuals of nbr of methCpG by nbr of covered CpG
X$res_Nbr_methCpG_Nbr_coveredCpG_HYPO = residuals(lm(X$hypoMeth ~ X$ncov))
X$res_Nbr_methCpG_Nbr_coveredCpG_HYPER = residuals(lm(X$hyperMeth ~ X$ncov))
## Statistical model: is it different in each offspring trt group?
mod1 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X, REML = F)
mod0 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ 1 + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X, REML = F)
print(lrtest(mod1, mod0))
## Post-hoc test:
modhypo <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X)
## pairwise posthoc test
print(emmeans(modhypo, list(pairwise ~ Treatment), adjust = "tukey"))
mod3 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X, REML = F)
mod4 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ 1 + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X, REML = F)
print(lrtest(mod3, mod4))
## Post-hoc test:
modhyper <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X)
## pairwise posthoc test
print(emmeans(modhyper, list(pairwise ~ Treatment), adjust = "tukey"))
## Plot
phypo <- plot(ggpredict(modhypo, terms = c("Treatment")), add.data = TRUE)+
scale_y_continuous("Residuals of number of hypomethylated methylated \ncytosines on number of cytosines covered") +
ggtitle("Predicted residuals nbr of hypomethylated CpG")+
theme_bw()
phyper <- plot(ggpredict(modhyper, terms = c("Treatment")), add.data = TRUE)+
scale_y_continuous("Residuals of number of hypermethylated methylated \n cytosines on number of cytosines covered") +
ggtitle("Predicted residuals nbr of hypermethylated CpG")+
theme_bw()
return(list(phypo, phyper))
}
listplots <- myfun(X = PM_G2[PM_G2$hypohyper %in% "hypo",])
## Likelihood ratio test
##
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ 1 + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 8 -420.75
## 2 5 -423.55 -3 5.605 0.1325
## $`emmeans of Treatment`
## Treatment emmean SE df lower.CL upper.CL
## NE_control -4.01 4.11 16.2 -12.72 4.70
## NE_exposed -5.64 4.14 16.3 -14.40 3.11
## E_control 7.45 4.12 16.1 -1.29 16.18
## E_exposed 4.61 4.12 16.1 -4.11 13.33
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $`pairwise differences of Treatment`
## 1 estimate SE df t.ratio p.value
## NE_control - NE_exposed 1.63 2.55 93.08 0.640 0.9188
## NE_control - E_control -11.46 5.83 8.47 -1.966 0.2722
## NE_control - E_exposed -8.62 5.82 8.47 -1.482 0.4875
## NE_exposed - E_control -13.09 5.86 8.53 -2.234 0.1893
## NE_exposed - E_exposed -10.25 5.85 8.53 -1.754 0.3559
## E_control - E_exposed 2.84 2.50 92.53 1.135 0.6692
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 4 estimates
##
## Likelihood ratio test
##
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ 1 + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 8 -419.75
## 2 5 -422.58 -3 5.6616 0.1293
## $`emmeans of Treatment`
## Treatment emmean SE df lower.CL upper.CL
## NE_control 3.97 4.18 16.1 -4.88 12.82
## NE_exposed 5.61 4.20 16.2 -3.29 14.51
## E_control -7.52 4.19 16.0 -16.40 1.35
## E_exposed -4.53 4.18 16.1 -13.39 4.33
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $`pairwise differences of Treatment`
## 1 estimate SE df t.ratio p.value
## NE_control - NE_exposed -1.64 2.51 93.04 -0.651 0.9148
## NE_control - E_control 11.50 5.92 8.37 1.942 0.2815
## NE_control - E_exposed 8.50 5.91 8.37 1.438 0.5112
## NE_exposed - E_control 13.13 5.95 8.43 2.208 0.1971
## NE_exposed - E_exposed 10.14 5.94 8.43 1.707 0.3771
## E_control - E_exposed -3.00 2.47 92.51 -1.216 0.6185
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 4 estimates
## NOT significant
annotate_figure(ggarrange(listplots[[1]], listplots[[2]],ncol = 2, nrow = 1),
top = text_grob("Parental DMS hypomethylated upon infection, in offspring"))
listplots <- myfun(X = PM_G2[PM_G2$hypohyper %in% "hyper",])
## Likelihood ratio test
##
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ 1 + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 8 -476.63
## 2 5 -482.69 -3 12.123 0.006974 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $`emmeans of Treatment`
## Treatment emmean SE df lower.CL upper.CL
## NE_control 13.94 5.13 17.0 3.12 24.75
## NE_exposed 8.48 5.19 17.2 -2.46 19.42
## E_control -9.77 5.15 16.8 -20.66 1.11
## E_exposed -12.95 5.14 16.9 -23.79 -2.11
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $`pairwise differences of Treatment`
## 1 estimate SE df t.ratio p.value
## NE_control - NE_exposed 5.46 4.45 93.8 1.225 0.6124
## NE_control - E_control 23.71 7.28 10.3 3.257 0.0353
## NE_control - E_exposed 26.88 7.26 10.3 3.701 0.0172
## NE_exposed - E_control 18.25 7.36 10.5 2.481 0.1209
## NE_exposed - E_exposed 21.43 7.33 10.5 2.923 0.0598
## E_control - E_exposed 3.17 4.37 93.0 0.726 0.8864
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 4 estimates
##
## Likelihood ratio test
##
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ 1 + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 8 -477.24
## 2 5 -483.28 -3 12.081 0.00711 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $`emmeans of Treatment`
## Treatment emmean SE df lower.CL upper.CL
## NE_control -14.11 5.18 17.0 -25.04 -3.18
## NE_exposed -8.53 5.24 17.2 -19.58 2.52
## E_control 9.95 5.21 16.8 -1.05 20.95
## E_exposed 12.96 5.19 16.9 2.00 23.92
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $`pairwise differences of Treatment`
## 1 estimate SE df t.ratio p.value
## NE_control - NE_exposed -5.58 4.47 93.8 -1.247 0.5989
## NE_control - E_control -24.06 7.36 10.3 -3.269 0.0348
## NE_control - E_exposed -27.07 7.34 10.3 -3.687 0.0177
## NE_exposed - E_control -18.48 7.43 10.4 -2.486 0.1204
## NE_exposed - E_exposed -21.49 7.41 10.4 -2.901 0.0622
## E_control - E_exposed -3.01 4.39 93.0 -0.686 0.9021
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 4 estimates
## Treatment SIGNIFICANT in both excess hypo/hyper methylation **
# $`pairwise differences of Treatment`
## HYPO
# 1 estimate SE df t.ratio p.value
# NE_control - E_control 23.71 7.28 10.3 3.257 0.0353
# NE_control - E_exposed 26.88 7.26 10.3 3.701 0.0172
## HYPER
# 1 estimate SE df t.ratio p.value
# NE_control - E_control -24.06 7.36 10.3 -3.269 0.0348
# NE_control - E_exposed -27.07 7.34 10.3 -3.687 0.0177
annotate_figure(ggarrange(listplots[[1]], listplots[[2]],ncol = 2, nrow = 1),
top = text_grob("Parental DMS hypermethylated upon infection, in offspring"))
-> The beta values in parentalDMS in offspring follow the parental pattern hypo/hyper methylated upon infection